bsd/ufs/ffs/ffs_vfsops.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  24 /*
  25  * Copyright (c) 1989, 1991, 1993, 1994
  26  *      The Regents of the University of California.  All rights reserved.
  27  *
  28  * Redistribution and use in source and binary forms, with or without
  29  * modification, are permitted provided that the following conditions
  30  * are met:
  31  * 1. Redistributions of source code must retain the above copyright
  32  *    notice, this list of conditions and the following disclaimer.
  33  * 2. Redistributions in binary form must reproduce the above copyright
  34  *    notice, this list of conditions and the following disclaimer in the
  35  *    documentation and/or other materials provided with the distribution.
  36  * 3. All advertising materials mentioning features or use of this software
  37  *    must display the following acknowledgement:
  38  *      This product includes software developed by the University of
  39  *      California, Berkeley and its contributors.
  40  * 4. Neither the name of the University nor the names of its contributors
  41  *    may be used to endorse or promote products derived from this software
  42  *    without specific prior written permission.
  43  *
  44  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  45  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  46  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  47  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  48  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  49  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  50  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  51  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  52  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  53  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  54  * SUCH DAMAGE.
  55  *
  56  *      @(#)ffs_vfsops.c        8.31 (Berkeley) 5/20/95
  57  */
  58
  59 #include <rev_endian_fs.h>
  60 #include <sys/param.h>
  61 #include <sys/systm.h>
  62 #include <sys/namei.h>
  63 #include <sys/proc.h>
  64 #include <sys/kauth.h>
  65 #include <sys/kernel.h>
  66 #include <sys/vnode_internal.h>
  67 #include <sys/socket.h>
  68 #include <sys/mount_internal.h>
  69 #include <sys/mount.h>
  70 #include <sys/buf.h>
  71 #include <sys/mbuf.h>
  72 #include <sys/file.h>
  73 #include <sys/disk.h>
  74 #include <sys/ioctl.h>
  75 #include <sys/errno.h>
  76 #include <sys/malloc.h>
  77 #include <sys/ubc.h>
  78 #include <sys/quota.h>
  79
  80 #include <miscfs/specfs/specdev.h>
  81
  82 #include <ufs/ufs/quota.h>
  83 #include <ufs/ufs/ufsmount.h>
  84 #include <ufs/ufs/inode.h>
  85 #include <ufs/ufs/ufs_extern.h>
  86
  87 #include <ufs/ffs/fs.h>
  88 #include <ufs/ffs/ffs_extern.h>
  89 #if REV_ENDIAN_FS
  90 #include <ufs/ufs/ufs_byte_order.h>
  91 #include <architecture/byte_order.h>
  92 #endif /* REV_ENDIAN_FS */
  93
  94 int ffs_sbupdate(struct ufsmount *, int);
  95
  96 struct vfsops ufs_vfsops = {
  97         ffs_mount,
  98         ufs_start,
  99         ffs_unmount,
 100         ufs_root,
 101         ufs_quotactl,
 102         ffs_vfs_getattr,
 103         ffs_sync,
 104         ffs_vget,
 105         ffs_fhtovp,
 106         ffs_vptofh,
 107         ffs_init,
 108         ffs_sysctl,
 109         ffs_vfs_setattr,
 110         {0}
 111 };
 112
 113 extern u_long nextgennumber;
 114
 115 union _qcvt {
 116         int64_t qcvt;
 117         int32_t val[2];
 118 };
 119 #define SETHIGH(q, h) { \
 120         union _qcvt tmp; \
 121         tmp.qcvt = (q); \
 122         tmp.val[_QUAD_HIGHWORD] = (h); \
 123         (q) = tmp.qcvt; \
 124 }
 125 #define SETLOW(q, l) { \
 126         union _qcvt tmp; \
 127         tmp.qcvt = (q); \
 128         tmp.val[_QUAD_LOWWORD] = (l); \
 129         (q) = tmp.qcvt; \
 130 }
 131
 132 /*
 133  * Called by main() when ufs is going to be mounted as root.
 134  */
 135 int
 136 ffs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
 137 {
 138         struct proc *p = current_proc();        /* XXX */
 139         int     error;
 140
 141         /* Set asynchronous flag by default */
 142         vfs_setflags(mp, MNT_ASYNC);
 143
 144         if (error = ffs_mountfs(rvp, mp, context))
 145                 return (error);
 146
 147         (void)ffs_statfs(mp, vfs_statfs(mp), NULL);
 148
 149         return (0);
 150 }
 151
 152 /*
 153  * VFS Operations.
 154  *
 155  * mount system call
 156  */
 157 int
 158 ffs_mount(struct mount *mp, vnode_t devvp, __unused user_addr_t data,  vfs_context_t context)
 159 {
 160         struct proc *p = vfs_context_proc(context);
 161         struct ufsmount *ump;
 162         register struct fs *fs;
 163         u_int size;
 164         int error  = 0, flags;
 165         mode_t accessmode;
 166         int ronly;
 167         int reload = 0;
 168
 169         /*
 170          * If updating, check whether changing from read-write to
 171          * read-only; if there is no device name, that's all we do.
 172          */
 173         if (mp->mnt_flag & MNT_UPDATE) {
 174                 ump = VFSTOUFS(mp);
 175                 fs = ump->um_fs;
 176                 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 177                         /*
 178                          * Flush any dirty data.
 179                          */
 180                         VFS_SYNC(mp, MNT_WAIT, context);
 181                         /*
 182                          * Check for and optionally get rid of files open
 183                          * for writing.
 184                          */
 185                         flags = WRITECLOSE;
 186                         if (mp->mnt_flag & MNT_FORCE)
 187                                 flags |= FORCECLOSE;
 188                         if (error = ffs_flushfiles(mp, flags, p))
 189                                 return (error);
 190                         fs->fs_clean = 1;
 191                         fs->fs_ronly = 1;
 192                         if (error = ffs_sbupdate(ump, MNT_WAIT)) {
 193                                 fs->fs_clean = 0;
 194                                 fs->fs_ronly = 0;
 195                                 return (error);
 196                         }
 197                 }
 198                 /* save fs_ronly to later use */
 199                 ronly = fs->fs_ronly;
 200                 if ((mp->mnt_flag & MNT_RELOAD) || ronly)
 201                         reload = 1;
 202                 if ((reload) &&
 203                     (error = ffs_reload(mp, vfs_context_ucred(context), p)))
 204                         return (error);
 205                 /* replace the ronly after load */
 206                 fs->fs_ronly = ronly;
 207                 /*
 208                 * Do not update the file system if the user was in singleuser
 209                 * and then tries to mount -uw without fscking
 210                 */
 211                 if (!fs->fs_clean && ronly) {
 212                         printf("WARNING: trying to mount a dirty file system\n");
 213                         if (issingleuser() && (mp->mnt_flag & MNT_ROOTFS)) {
 214                                 printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",fs->fs_fsmnt);
 215                                 /*
 216                                  * Reset the readonly bit as reload might have
 217                                  * modified this bit
 218                                  */
 219                                 fs->fs_ronly = 1;
 220                                 return(EPERM);
 221                         }
 222                 }
 223
 224                 if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 225                         fs->fs_ronly = 0;
 226                         fs->fs_clean = 0;
 227                         (void) ffs_sbupdate(ump, MNT_WAIT);
 228                 }
 229                 if (devvp == 0) {
 230                         return(0);
 231                 }
 232         }
 233         if ((mp->mnt_flag & MNT_UPDATE) == 0)
 234                 error = ffs_mountfs(devvp, mp, context);
 235         else {
 236                 if (devvp != ump->um_devvp)
 237                         error = EINVAL; /* needs translation */
 238         }
 239         if (error) {
 240                 return (error);
 241         }
 242         ump = VFSTOUFS(mp);
 243         fs = ump->um_fs;
 244         bzero(fs->fs_fsmnt , sizeof(fs->fs_fsmnt));
 245         strncpy(fs->fs_fsmnt,  (caddr_t)mp->mnt_vfsstat.f_mntonname, sizeof(fs->fs_fsmnt) - 1);
 246         (void)ffs_statfs(mp, &mp->mnt_vfsstat, p);
 247         return (0);
 248 }
 249
 250
 251 struct ffs_reload_cargs {
 252         struct vnode    *devvp;
 253         kauth_cred_t cred;
 254         struct fs       *fs;
 255         struct proc     *p;
 256         int             error;
 257 #if REV_ENDIAN_FS
 258         int             rev_endian;
 259 #endif /* REV_ENDIAN_FS */
 260 };
 261
 262
 263 static int
 264 ffs_reload_callback(struct vnode *vp, void *cargs)
 265 {
 266         struct inode *ip;
 267         struct buf   *bp;
 268         struct fs    *fs;
 269         struct ffs_reload_cargs *args;
 270
 271         args = (struct ffs_reload_cargs *)cargs;
 272
 273         /*
 274          * flush all the buffers associated with this node
 275          */
 276         if (buf_invalidateblks(vp, 0, 0, 0))
 277                 panic("ffs_reload: dirty2");
 278
 279         /*
 280          * Step 6: re-read inode data
 281          */
 282         ip = VTOI(vp);
 283         fs = args->fs;
 284
 285         if (args->error = (int)buf_bread(args->devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ip->i_number))),
 286                                          (int)fs->fs_bsize, NOCRED, &bp)) {
 287                 buf_brelse(bp);
 288
 289                 return (VNODE_RETURNED_DONE);
 290         }
 291
 292 #if REV_ENDIAN_FS
 293         if (args->rev_endian) {
 294                 byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) +
 295                                     ino_to_fsbo(fs, ip->i_number)), ip);
 296         } else {
 297 #endif /* REV_ENDIAN_FS */
 298                 ip->i_din = *((struct dinode *)buf_dataptr(bp) +
 299                               ino_to_fsbo(fs, ip->i_number));
 300 #if REV_ENDIAN_FS
 301         }
 302 #endif /* REV_ENDIAN_FS */
 303
 304         buf_brelse(bp);
 305
 306         return (VNODE_RETURNED);
 307 }
 308
 309
 310 /*
 311  * Reload all incore data for a filesystem (used after running fsck on
 312  * the root filesystem and finding things to fix). The filesystem must
 313  * be mounted read-only.
 314  *
 315  * Things to do to update the mount:
 316  *      1) invalidate all cached meta-data.
 317  *      2) re-read superblock from disk.
 318  *      3) re-read summary information from disk.
 319  *      4) invalidate all inactive vnodes.
 320  *      5) invalidate all cached file data.
 321  *      6) re-read inode data for all active vnodes.
 322  */
 323 ffs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p)
 324 {
 325         register struct vnode *devvp;
 326         void *space;
 327         struct buf *bp;
 328         struct fs *fs, *newfs;
 329         int i, blks, size, error;
 330         u_int64_t maxfilesize;                                  /* XXX */
 331         int32_t *lp;
 332         struct ffs_reload_cargs args;
 333 #if REV_ENDIAN_FS
 334         int rev_endian = (mountp->mnt_flag & MNT_REVEND);
 335 #endif /* REV_ENDIAN_FS */
 336
 337         if ((mountp->mnt_flag & MNT_RDONLY) == 0)
 338                 return (EINVAL);
 339         /*
 340          * Step 1: invalidate all cached meta-data.
 341          */
 342         devvp = VFSTOUFS(mountp)->um_devvp;
 343         if (buf_invalidateblks(devvp, 0, 0, 0))
 344                 panic("ffs_reload: dirty1");
 345         /*
 346          * Step 2: re-read superblock from disk.
 347          */
 348         size = vfs_devblocksize(mountp);
 349
 350         if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)), SBSIZE, NOCRED,&bp)) {
 351                 buf_brelse(bp);
 352                 return (error);
 353         }
 354         newfs = (struct fs *)buf_dataptr(bp);
 355 #if REV_ENDIAN_FS
 356         if (rev_endian) {
 357                 byte_swap_sbin(newfs);
 358         }
 359 #endif /* REV_ENDIAN_FS */
 360         if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
 361             newfs->fs_bsize < sizeof(struct fs)) {
 362 #if REV_ENDIAN_FS
 363                 if (rev_endian)
 364                         byte_swap_sbout(newfs);
 365 #endif /* REV_ENDIAN_FS */
 366
 367                 buf_brelse(bp);
 368                 return (EIO);           /* XXX needs translation */
 369         }
 370         fs = VFSTOUFS(mountp)->um_fs;
 371         /*
 372          * Copy pointer fields back into superblock before copying in   XXX
 373          * new superblock. These should really be in the ufsmount.      XXX
 374          * Note that important parameters (eg fs_ncg) are unchanged.
 375          */
 376         newfs->fs_csp = fs->fs_csp;
 377         newfs->fs_maxcluster = fs->fs_maxcluster;
 378         newfs->fs_contigdirs = fs->fs_contigdirs;
 379         bcopy(newfs, fs, (u_int)fs->fs_sbsize);
 380         if (fs->fs_sbsize < SBSIZE)
 381                 buf_markinvalid(bp);
 382 #if REV_ENDIAN_FS
 383         if (rev_endian)
 384                 byte_swap_sbout(newfs);
 385 #endif /* REV_ENDIAN_FS */
 386         buf_brelse(bp);
 387         mountp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 388         ffs_oldfscompat(fs);
 389         maxfilesize = 0x100000000ULL;    /* 4GB */
 390         if (fs->fs_maxfilesize > maxfilesize)                   /* XXX */
 391                 fs->fs_maxfilesize = maxfilesize;               /* XXX */
 392         /*
 393          * Step 3: re-read summary information from disk.
 394          */
 395         blks = howmany(fs->fs_cssize, fs->fs_fsize);
 396         space = fs->fs_csp;
 397         for (i = 0; i < blks; i += fs->fs_frag) {
 398                 size = fs->fs_bsize;
 399                 if (i + fs->fs_frag > blks)
 400                         size = (blks - i) * fs->fs_fsize;
 401                 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)), size,
 402                                            NOCRED, &bp)) {
 403                         buf_brelse(bp);
 404                         return (error);
 405                 }
 406 #if REV_ENDIAN_FS
 407                 if (rev_endian) {
 408                         /* csum swaps */
 409                         byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
 410                 }
 411 #endif /* REV_ENDIAN_FS */
 412                 bcopy((char *)buf_dataptr(bp), space, (u_int)size);
 413 #if REV_ENDIAN_FS
 414                 if (rev_endian) {
 415                         /* csum swaps */
 416                         byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
 417                 }
 418 #endif /* REV_ENDIAN_FS */
 419                 space = (char *) space + size;
 420                 buf_brelse(bp);
 421         }
 422         /*
 423          * We no longer know anything about clusters per cylinder group.
 424          */
 425         if (fs->fs_contigsumsize > 0) {
 426                 lp = fs->fs_maxcluster;
 427                 for (i = 0; i < fs->fs_ncg; i++)
 428                         *lp++ = fs->fs_contigsumsize;
 429         }
 430 #if REV_ENDIAN_FS
 431         args.rev_endian = rev_endian;
 432 #endif /* REV_ENDIAN_FS */
 433         args.devvp = devvp;
 434         args.cred = cred;
 435         args.fs = fs;
 436         args.p = p;
 437         args.error = 0;
 438         /*
 439          * ffs_reload_callback will be called for each vnode
 440          * hung off of this mount point that can't be recycled...
 441          * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
 442          * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
 443          * properly referenced and unreferenced around the callback
 444          */
 445         vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, ffs_reload_callback, (void *)&args);
 446
 447         return (args.error);
 448 }
 449
 450 /*
 451  * Common code for mount and mountroot
 452  */
 453 int
 454 ffs_mountfs(devvp, mp, context)
 455         struct vnode *devvp;
 456         struct mount *mp;
 457         vfs_context_t context;
 458 {
 459         struct ufsmount *ump;
 460         struct buf *bp;
 461         struct fs *fs;
 462         dev_t dev;
 463         struct buf *cgbp;
 464         struct cg *cgp;
 465         int32_t clustersumoff;
 466         void *space;
 467         int error, i, blks, ronly;
 468         u_int32_t size;
 469         int32_t *lp;
 470         kauth_cred_t cred;
 471         u_int64_t maxfilesize;                                  /* XXX */
 472         u_int dbsize = DEV_BSIZE;
 473 #if REV_ENDIAN_FS
 474         int rev_endian=0;
 475 #endif /* REV_ENDIAN_FS */
 476         dev = devvp->v_rdev;
 477         cred = vfs_context_ucred(context);
 478
 479         ronly = vfs_isrdonly(mp);
 480         bp  = NULL;
 481         ump = NULL;
 482
 483         /* Advisory locking should be handled at the VFS layer */
 484         vfs_setlocklocal(mp);
 485
 486         /* Obtain the actual device block size */
 487         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&size, 0, context)) {
 488                 error = ENXIO;
 489                 goto out;
 490         }
 491
 492         if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)),
 493             SBSIZE, cred, &bp))
 494                 goto out;
 495         fs = (struct fs *)buf_dataptr(bp);
 496 #if REV_ENDIAN_FS
 497         if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
 498             fs->fs_bsize < sizeof(struct fs)) {
 499                 int magic = fs->fs_magic;
 500
 501                 byte_swap_ints(&magic, 1);
 502                 if (magic != FS_MAGIC) {
 503                         error = EINVAL;
 504                         goto out;
 505                 }
 506                 byte_swap_sbin(fs);
 507                 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
 508                         fs->fs_bsize < sizeof(struct fs)) {
 509                         byte_swap_sbout(fs);
 510                         error = EINVAL;         /* XXX needs translation */
 511                         goto out;
 512                 }
 513                 rev_endian=1;
 514         }
 515 #endif /* REV_ENDIAN_FS */
 516         if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
 517             fs->fs_bsize < sizeof(struct fs)) {
 518 #if REV_ENDIAN_FS
 519                 if (rev_endian)
 520                         byte_swap_sbout(fs);
 521 #endif /* REV_ENDIAN_FS */
 522                 error = EINVAL;         /* XXX needs translation */
 523                 goto out;
 524         }
 525
 526
 527         /*
 528          * Buffer cache does not handle multiple pages in a buf when
 529          * invalidating incore buffer in pageout. There are no locks
 530          * in the pageout path.  So there is a danger of loosing data when
 531          * block allocation happens at the same time a pageout of buddy
 532          * page occurs. incore() returns buf with both
 533          * pages, this leads vnode-pageout to incorrectly flush of entire.
 534          * buf. Till the low level ffs code is modified to deal with these
 535          * do not mount any FS more than 4K size.
 536          */
 537         /*
 538          * Can't mount filesystems with a fragment size less than DIRBLKSIZ
 539          */
 540         /*
 541          * Don't mount dirty filesystems, except for the root filesystem
 542          */
 543         if ((fs->fs_bsize > PAGE_SIZE) || (fs->fs_fsize < DIRBLKSIZ) ||
 544         ((!(mp->mnt_flag & MNT_ROOTFS)) && (!fs->fs_clean))) {
 545 #if REV_ENDIAN_FS
 546                 if (rev_endian)
 547                         byte_swap_sbout(fs);
 548 #endif /* REV_ENDIAN_FS */
 549         error = ENOTSUP;
 550         goto out;
 551     }
 552
 553         /* Let's figure out the devblock size the file system is with */
 554         /* the device block size = fragment size / number of sectors per frag */
 555
 556         dbsize = fs->fs_fsize / NSPF(fs);
 557         if(dbsize <= 0 ) {
 558                 kprintf("device blocksize computaion failed\n");
 559         } else {
 560                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&dbsize,
 561                                 FWRITE, context) != 0) {
 562                         kprintf("failed to set device blocksize\n");
 563                 }
 564                 /* force the specfs to reread blocksize from size() */
 565                 set_fsblocksize(devvp);
 566         }
 567
 568         /* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
 569         if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
 570 #if REV_ENDIAN_FS
 571                 if (rev_endian)
 572                         byte_swap_sbout(fs);
 573 #endif /* REV_ENDIAN_FS */
 574                 error = EROFS;          /* needs translation */
 575                 goto out;
 576         }
 577
 578         /* If we are not mounting read only, then check for overlap
 579          * condition in cylinder group's free block map.
 580          * If overlap exists, then force this into a read only mount
 581          * to avoid further corruption. PR#2216969
 582          */
 583         if (ronly == 0){
 584             if (error = (int)buf_bread (devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, 0))),
 585                                         (int)fs->fs_cgsize, NOCRED, &cgbp)) {
 586                         buf_brelse(cgbp);
 587                         goto out;
 588                 }
 589                 cgp = (struct cg *)buf_dataptr(cgbp);
 590 #if REV_ENDIAN_FS
 591                 if (rev_endian)
 592                         byte_swap_cgin(cgp,fs);
 593 #endif /* REV_ENDIAN_FS */
 594                 if (!cg_chkmagic(cgp)){
 595 #if REV_ENDIAN_FS
 596                                 if (rev_endian)
 597                                         byte_swap_cgout(cgp,fs);
 598 #endif /* REV_ENDIAN_FS */
 599                         buf_brelse(cgbp);
 600                         goto out;
 601                 }
 602                 if (cgp->cg_clustersumoff != 0) {
 603                         /* Check for overlap */
 604                         clustersumoff = cgp->cg_freeoff +
 605                         howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY);
 606                         clustersumoff = roundup(clustersumoff, sizeof(long));
 607                         if (cgp->cg_clustersumoff < clustersumoff) {
 608                         /* Overlap exists */
 609                         mp->mnt_flag |= MNT_RDONLY;
 610                                 ronly = 1;
 611                         }
 612                 }
 613 #if REV_ENDIAN_FS
 614                         if (rev_endian)
 615                                 byte_swap_cgout(cgp,fs);
 616 #endif /* REV_ENDIAN_FS */
 617                         buf_brelse(cgbp);
 618         }
 619
 620         ump = _MALLOC(sizeof *ump, M_UFSMNT, M_WAITOK);
 621         bzero((caddr_t)ump, sizeof *ump);
 622         ump->um_fs = _MALLOC((u_long)fs->fs_sbsize, M_UFSMNT,
 623             M_WAITOK);
 624         bcopy((char *)buf_dataptr(bp), ump->um_fs, (u_int)fs->fs_sbsize);
 625         if (fs->fs_sbsize < SBSIZE)
 626                 buf_markinvalid(bp);
 627 #if REV_ENDIAN_FS
 628         if (rev_endian)
 629                 byte_swap_sbout(fs);
 630 #endif /* REV_ENDIAN_FS */
 631         buf_brelse(bp);
 632         bp = NULL;
 633         fs = ump->um_fs;
 634         fs->fs_ronly = ronly;
 635         size = fs->fs_cssize;
 636         blks = howmany(size, fs->fs_fsize);
 637         if (fs->fs_contigsumsize > 0)
 638                 size += fs->fs_ncg * sizeof(int32_t);
 639         size += fs->fs_ncg * sizeof(u_int8_t);
 640         space = _MALLOC((u_long)size, M_UFSMNT, M_WAITOK);
 641         fs->fs_csp = space;
 642         for (i = 0; i < blks; i += fs->fs_frag) {
 643                 size = fs->fs_bsize;
 644                 if (i + fs->fs_frag > blks)
 645                         size = (blks - i) * fs->fs_fsize;
 646                 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
 647                                            size, cred, &bp)) {
 648                         _FREE(fs->fs_csp, M_UFSMNT);
 649                         goto out;
 650                 }
 651                 bcopy((char *)buf_dataptr(bp), space, (u_int)size);
 652 #if REV_ENDIAN_FS
 653                 if (rev_endian)
 654                         byte_swap_ints((int *) space, size / sizeof(int));
 655 #endif /* REV_ENDIAN_FS */
 656                 space = (char *)space + size;
 657                 buf_brelse(bp);
 658                 bp = NULL;
 659         }
 660         if (fs->fs_contigsumsize > 0) {
 661                 fs->fs_maxcluster = lp = space;
 662                 for (i = 0; i < fs->fs_ncg; i++)
 663                         *lp++ = fs->fs_contigsumsize;
 664                 space = lp;
 665         }
 666         size = fs->fs_ncg * sizeof(u_int8_t);
 667         fs->fs_contigdirs = (u_int8_t *)space;
 668         space = (u_int8_t *)space + size;
 669         bzero(fs->fs_contigdirs, size);
 670         /* XXX Compatibility for old filesystems */
 671         if (fs->fs_avgfilesize <= 0)
 672                 fs->fs_avgfilesize = AVFILESIZ;
 673         if (fs->fs_avgfpdir <= 0)
 674                 fs->fs_avgfpdir = AFPDIR;
 675         /* XXX End of compatibility */
 676         mp->mnt_data = (qaddr_t)ump;
 677         mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
 678         mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
 679         /* XXX warning hardcoded max symlen and not "mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;" */
 680         mp->mnt_maxsymlinklen = 60;
 681 #if REV_ENDIAN_FS
 682         if (rev_endian)
 683                 mp->mnt_flag |= MNT_REVEND;
 684 #endif /* REV_ENDIAN_FS */
 685         ump->um_mountp = mp;
 686         ump->um_dev = dev;
 687         ump->um_devvp = devvp;
 688         ump->um_nindir = fs->fs_nindir;
 689         ump->um_bptrtodb = fs->fs_fsbtodb;
 690         ump->um_seqinc = fs->fs_frag;
 691         for (i = 0; i < MAXQUOTAS; i++)
 692                 dqfileinit(&ump->um_qfiles[i]);
 693         ffs_oldfscompat(fs);
 694         ump->um_savedmaxfilesize = fs->fs_maxfilesize;          /* XXX */
 695         maxfilesize = 0x100000000ULL;    /* 4GB */
 696 #if 0
 697         maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1; /* XXX */
 698 #endif /* 0 */
 699         if (fs->fs_maxfilesize > maxfilesize)                   /* XXX */
 700                 fs->fs_maxfilesize = maxfilesize;               /* XXX */
 701         if (ronly == 0) {
 702                 fs->fs_clean = 0;
 703                 (void) ffs_sbupdate(ump, MNT_WAIT);
 704         }
 705         return (0);
 706 out:
 707         if (bp)
 708                 buf_brelse(bp);
 709         if (ump) {
 710                 _FREE(ump->um_fs, M_UFSMNT);
 711                 _FREE(ump, M_UFSMNT);
 712         }
 713         return (error);
 714 }
 715
 716 /*
 717  * Sanity checks for old file systems.
 718  *
 719  * XXX - goes away some day.
 720  */
 721 ffs_oldfscompat(fs)
 722         struct fs *fs;
 723 {
 724         int i;
 725
 726         fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);       /* XXX */
 727         fs->fs_interleave = max(fs->fs_interleave, 1);          /* XXX */
 728         if (fs->fs_postblformat == FS_42POSTBLFMT)              /* XXX */
 729                 fs->fs_nrpos = 8;                               /* XXX */
 730         if (fs->fs_inodefmt < FS_44INODEFMT) {                  /* XXX */
 731                 u_int64_t sizepb = fs->fs_bsize;                /* XXX */
 732                                                                 /* XXX */
 733                 fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */
 734                 for (i = 0; i < NIADDR; i++) {                  /* XXX */
 735                         sizepb *= NINDIR(fs);                   /* XXX */
 736                         fs->fs_maxfilesize += sizepb;           /* XXX */
 737                 }                                               /* XXX */
 738                 fs->fs_qbmask = ~fs->fs_bmask;                  /* XXX */
 739                 fs->fs_qfmask = ~fs->fs_fmask;                  /* XXX */
 740         }                                                       /* XXX */
 741         return (0);
 742 }
 743
 744 /*
 745  * unmount system call
 746  */
 747 int
 748 ffs_unmount(mp, mntflags, context)
 749         struct mount *mp;
 750         int mntflags;
 751         vfs_context_t context;
 752 {
 753         struct proc *p = vfs_context_proc(context);
 754         register struct ufsmount *ump;
 755         register struct fs *fs;
 756         int error, flags;
 757         int force;
 758
 759         flags = 0;
 760         force = 0;
 761         if (mntflags & MNT_FORCE) {
 762                 flags |= FORCECLOSE;
 763                 force = 1;
 764         }
 765         if ( (error = ffs_flushfiles(mp, flags, p)) && !force )
 766                 return (error);
 767         ump = VFSTOUFS(mp);
 768         fs = ump->um_fs;
 769
 770         if (fs->fs_ronly == 0) {
 771                 fs->fs_clean = 1;
 772                 if (error = ffs_sbupdate(ump, MNT_WAIT)) {
 773                         fs->fs_clean = 0;
 774 #ifdef notyet
 775                 /* we can atleast cleanup ; as the media could be WP */
 776                 /* & during mount, we do not check for write failures  */
 777                 /* FIXME LATER : the Correct fix would be to have */
 778                 /* mount detect the WP media and downgrade to readonly mount */
 779                 /* For now, here it is */
 780                         return (error);
 781 #endif /* notyet */
 782                 }
 783         }
 784         _FREE(fs->fs_csp, M_UFSMNT);
 785         _FREE(fs, M_UFSMNT);
 786         _FREE(ump, M_UFSMNT);
 787
 788         return (0);
 789 }
 790
 791 /*
 792  * Flush out all the files in a filesystem.
 793  */
 794 ffs_flushfiles(mp, flags, p)
 795         register struct mount *mp;
 796         int flags;
 797         struct proc *p;
 798 {
 799         register struct ufsmount *ump;
 800         int i, error;
 801
 802         ump = VFSTOUFS(mp);
 803
 804 #if QUOTA
 805         /*
 806          * NOTE: The open quota files have an indirect reference
 807          * on the root directory vnode.  We must account for this
 808          * extra reference when doing the intial vflush.
 809          */
 810         if (mp->mnt_flag & MNT_QUOTA) {
 811                 struct vnode *rootvp = NULLVP;
 812                 int quotafilecnt = 0;
 813
 814                 /* Find out how many quota files we have open. */
 815                 for (i = 0; i < MAXQUOTAS; i++) {
 816                         if (ump->um_qfiles[i].qf_vp != NULLVP)
 817                                 ++quotafilecnt;
 818                 }
 819
 820                 /*
 821                  * Check if the root vnode is in our inode hash
 822                  * (so we can skip over it).
 823                  */
 824                 rootvp = ufs_ihashget(ump->um_dev, ROOTINO);
 825
 826                 error = vflush(mp, rootvp, SKIPSYSTEM|flags);
 827
 828                 if (rootvp) {
 829                         /*
 830                          * See if there are additional references on the
 831                          * root vp besides the ones obtained from the open
 832                          * quota files and the hfs_chashget call above.
 833                          */
 834                         if ((error == 0) &&
 835                             (rootvp->v_usecount > (1 + quotafilecnt))) {
 836                                 error = EBUSY;  /* root dir is still open */
 837                         }
 838                         vnode_put(rootvp);
 839                 }
 840                 if (error && (flags & FORCECLOSE) == 0)
 841                         return (error);
 842
 843                 for (i = 0; i < MAXQUOTAS; i++) {
 844                         if (ump->um_qfiles[i].qf_vp == NULLVP)
 845                                 continue;
 846                         quotaoff(mp, i);
 847                 }
 848                 /*
 849                  * Here we fall through to vflush again to ensure
 850                  * that we have gotten rid of all the system vnodes.
 851                  */
 852         }
 853 #endif
 854         error = vflush(mp, NULLVP, SKIPSWAP|flags);
 855         error = vflush(mp, NULLVP, flags);
 856         return (error);
 857 }
 858
 859 /*
 860  * Get file system statistics.
 861  */
 862 int
 863 ffs_statfs(mp, sbp, context)
 864         struct mount *mp;
 865         register struct vfsstatfs *sbp;
 866         vfs_context_t context;
 867 {
 868         register struct ufsmount *ump;
 869         register struct fs *fs;
 870
 871         ump = VFSTOUFS(mp);
 872         fs = ump->um_fs;
 873         if (fs->fs_magic != FS_MAGIC)
 874                 panic("ffs_statfs");
 875         sbp->f_bsize = fs->fs_fsize;
 876         sbp->f_iosize = fs->fs_bsize;
 877         sbp->f_blocks = (uint64_t)((unsigned long)fs->fs_dsize);
 878         sbp->f_bfree = (uint64_t) ((unsigned long)(fs->fs_cstotal.cs_nbfree * fs->fs_frag +
 879                 fs->fs_cstotal.cs_nffree));
 880         sbp->f_bavail = (uint64_t) ((unsigned long)freespace(fs, fs->fs_minfree));
 881         sbp->f_files =  (uint64_t) ((unsigned long)(fs->fs_ncg * fs->fs_ipg - ROOTINO));
 882         sbp->f_ffree = (uint64_t) ((unsigned long)fs->fs_cstotal.cs_nifree);
 883         return (0);
 884 }
 885
 886 int
 887 ffs_vfs_getattr(mp, fsap, context)
 888         struct mount *mp;
 889         struct vfs_attr *fsap;
 890         vfs_context_t context;
 891 {
 892         struct ufsmount *ump;
 893         struct fs *fs;
 894         kauth_cred_t cred;
 895         struct vnode *devvp;
 896         struct buf *bp;
 897         struct ufslabel *ulp;
 898         char *offset;
 899         int bs, error, length;
 900
 901         ump = VFSTOUFS(mp);
 902         fs = ump->um_fs;
 903         cred = vfs_context_ucred(context);
 904
 905         VFSATTR_RETURN(fsap, f_bsize, fs->fs_fsize);
 906         VFSATTR_RETURN(fsap, f_iosize, fs->fs_bsize);
 907         VFSATTR_RETURN(fsap, f_blocks, (uint64_t)((unsigned long)fs->fs_dsize));
 908         VFSATTR_RETURN(fsap, f_bfree, (uint64_t)((unsigned long)
 909             (fs->fs_cstotal.cs_nbfree * fs->fs_frag +
 910             fs->fs_cstotal.cs_nffree)));
 911         VFSATTR_RETURN(fsap, f_bavail, (uint64_t)((unsigned long)freespace(fs,
 912             fs->fs_minfree)));
 913         VFSATTR_RETURN(fsap, f_files, (uint64_t)((unsigned long)
 914             (fs->fs_ncg * fs->fs_ipg - ROOTINO)));
 915         VFSATTR_RETURN(fsap, f_ffree, (uint64_t)((unsigned long)
 916             fs->fs_cstotal.cs_nifree));
 917
 918         if (VFSATTR_IS_ACTIVE(fsap, f_fsid)) {
 919                 fsap->f_fsid.val[0] = mp->mnt_vfsstat.f_fsid.val[0];
 920                 fsap->f_fsid.val[1] = mp->mnt_vfsstat.f_fsid.val[1];
 921                 VFSATTR_SET_SUPPORTED(fsap, f_fsid);
 922         }
 923
 924         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
 925                 devvp = ump->um_devvp;
 926                 bs = vfs_devblocksize(mp);
 927
 928                 if (error = (int)buf_meta_bread(devvp,
 929                     (daddr64_t)(UFS_LABEL_OFFSET / bs),
 930                     MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
 931                         if (bp)
 932                                 buf_brelse(bp);
 933                         return (error);
 934                 }
 935
 936                 /*
 937                  * Since the disklabel is read directly by older user space
 938                  * code, make sure this buffer won't remain in the cache when
 939                  * we release it.
 940                  */
 941                 buf_setflags(bp, B_NOCACHE);
 942
 943                 offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
 944                 ulp = (struct ufslabel *)offset;
 945
 946                 if (ufs_label_check(ulp)) {
 947                         length = ulp->ul_namelen;
 948 #if REV_ENDIAN_FS
 949                         if (mp->mnt_flag & MNT_REVEND)
 950                                 length = NXSwapShort(length);
 951 #endif
 952                         if (length > 0 && length <= UFS_MAX_LABEL_NAME) {
 953                                 bcopy(ulp->ul_name, fsap->f_vol_name, length);
 954                                 fsap->f_vol_name[UFS_MAX_LABEL_NAME - 1] = '\0';
 955                                 fsap->f_vol_name[length] = '\0';
 956                         }
 957                 }
 958
 959                 buf_brelse(bp);
 960                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
 961         }
 962
 963         if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
 964                 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] =
 965                     VOL_CAP_FMT_SYMBOLICLINKS |
 966                     VOL_CAP_FMT_HARDLINKS |
 967                     VOL_CAP_FMT_SPARSE_FILES |
 968                     VOL_CAP_FMT_CASE_SENSITIVE |
 969                     VOL_CAP_FMT_CASE_PRESERVING |
 970                     VOL_CAP_FMT_FAST_STATFS ;
 971                 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES]
 972                     = VOL_CAP_INT_NFSEXPORT |
 973                     VOL_CAP_INT_VOL_RENAME |
 974                     VOL_CAP_INT_ADVLOCK |
 975                     VOL_CAP_INT_FLOCK;
 976                 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1]
 977                     = 0;
 978                 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2]
 979                     = 0;
 980
 981                 /* Capabilities we know about: */
 982                 fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] =
 983                     VOL_CAP_FMT_PERSISTENTOBJECTIDS |
 984                     VOL_CAP_FMT_SYMBOLICLINKS |
 985                     VOL_CAP_FMT_HARDLINKS |
 986                     VOL_CAP_FMT_JOURNAL |
 987                     VOL_CAP_FMT_JOURNAL_ACTIVE |
 988                     VOL_CAP_FMT_NO_ROOT_TIMES |
 989                     VOL_CAP_FMT_SPARSE_FILES |
 990                     VOL_CAP_FMT_ZERO_RUNS |
 991                     VOL_CAP_FMT_CASE_SENSITIVE |
 992                     VOL_CAP_FMT_CASE_PRESERVING |
 993                     VOL_CAP_FMT_FAST_STATFS |
 994                     VOL_CAP_FMT_2TB_FILESIZE;
 995                 fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] =
 996                     VOL_CAP_INT_SEARCHFS |
 997                     VOL_CAP_INT_ATTRLIST |
 998                     VOL_CAP_INT_NFSEXPORT |
 999                     VOL_CAP_INT_READDIRATTR |
1000                     VOL_CAP_INT_EXCHANGEDATA |
1001                     VOL_CAP_INT_COPYFILE |
1002                     VOL_CAP_INT_ALLOCATE |
1003                     VOL_CAP_INT_VOL_RENAME |
1004                     VOL_CAP_INT_ADVLOCK |
1005                     VOL_CAP_INT_FLOCK ;
1006                 fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0;
1007                 fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0;
1008
1009                 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
1010         }
1011
1012         if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
1013                 fsap->f_attributes.validattr.commonattr = 0;
1014                 fsap->f_attributes.validattr.volattr =
1015                     ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1016                 fsap->f_attributes.validattr.dirattr = 0;
1017                 fsap->f_attributes.validattr.fileattr = 0;
1018                 fsap->f_attributes.validattr.forkattr = 0;
1019
1020                 fsap->f_attributes.nativeattr.commonattr = 0;
1021                 fsap->f_attributes.nativeattr.volattr =
1022                     ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1023                 fsap->f_attributes.nativeattr.dirattr = 0;
1024                 fsap->f_attributes.nativeattr.fileattr = 0;
1025                 fsap->f_attributes.nativeattr.forkattr = 0;
1026
1027                 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
1028         }
1029
1030         return (0);
1031 }
1032
1033
1034 int
1035 ffs_vfs_setattr(mp, fsap, context)
1036         struct mount *mp;
1037         struct vfs_attr *fsap;
1038         vfs_context_t context;
1039 {
1040         struct ufsmount *ump;
1041         struct vnode *devvp;
1042         struct buf *bp;
1043         struct ufslabel *ulp;
1044         kauth_cred_t cred;
1045         char *offset;
1046         int bs, error;
1047
1048
1049         ump = VFSTOUFS(mp);
1050         cred = vfs_context_ucred(context);
1051
1052         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
1053                 devvp = ump->um_devvp;
1054                 bs = vfs_devblocksize(mp);
1055                 if (error = buf_meta_bread(devvp,
1056                     (daddr64_t)(UFS_LABEL_OFFSET / bs),
1057                     MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
1058                         if (bp)
1059                                 buf_brelse(bp);
1060                         return (error);
1061                 }
1062
1063                 /*
1064                  * Since the disklabel is read directly by older user space
1065                  * code, make sure this buffer won't remain in the cache when
1066                  * we release it.
1067                  */
1068                 buf_setflags(bp, B_NOCACHE);
1069
1070                 /* Validate the label structure; init if not valid */
1071                 offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
1072                 ulp = (struct ufslabel *)offset;
1073                 if (!ufs_label_check(ulp))
1074                         ufs_label_init(ulp);
1075
1076                 /* Copy new name over existing name */
1077                 ulp->ul_namelen = strlen(fsap->f_vol_name);
1078 #if REV_ENDIAN_FS
1079                 if (mp->mnt_flag & MNT_REVEND)
1080                         ulp->ul_namelen = NXSwapShort(ulp->ul_namelen);
1081 #endif
1082                 bcopy(fsap->f_vol_name, ulp->ul_name, ulp->ul_namelen);
1083                 ulp->ul_name[UFS_MAX_LABEL_NAME - 1] = '\0';
1084                 ulp->ul_name[ulp->ul_namelen] = '\0';
1085
1086                 /* Update the checksum */
1087                 ulp->ul_checksum = 0;
1088                 ulp->ul_checksum = ul_cksum(ulp, sizeof(*ulp));
1089
1090                 /* Write the label back to disk */
1091                 buf_bwrite(bp);
1092                 bp = NULL;
1093
1094                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
1095         }
1096
1097         return (0);
1098  }
1099 struct ffs_sync_cargs {
1100         vfs_context_t context;
1101         int    waitfor;
1102         int    error;
1103 };
1104
1105
1106 static int
1107 ffs_sync_callback(struct vnode *vp, void *cargs)
1108 {
1109         struct inode *ip;
1110         struct ffs_sync_cargs *args;
1111         int error;
1112
1113         args = (struct ffs_sync_cargs *)cargs;
1114
1115         ip = VTOI(vp);
1116
1117         if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) || vnode_hasdirtyblks(vp)) {
1118                 error = VNOP_FSYNC(vp, args->waitfor, args->context);
1119
1120                 if (error)
1121                         args->error = error;
1122
1123         }
1124         return (VNODE_RETURNED);
1125 }
1126
1127 /*
1128  * Go through the disk queues to initiate sandbagged IO;
1129  * go through the inodes to write those that have been modified;
1130  * initiate the writing of the super block if it has been modified.
1131  *
1132  * Note: we are always called with the filesystem marked `MPBUSY'.
1133  */
1134 int
1135 ffs_sync(mp, waitfor, context)
1136         struct mount *mp;
1137         int waitfor;
1138         vfs_context_t context;
1139 {
1140         struct vnode *nvp, *vp;
1141         struct ufsmount *ump = VFSTOUFS(mp);
1142         struct fs *fs;
1143         struct timeval tv;
1144         int error, allerror = 0;
1145         struct ffs_sync_cargs args;
1146
1147         fs = ump->um_fs;
1148         if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {            /* XXX */
1149                 printf("fs = %s\n", fs->fs_fsmnt);
1150                 panic("update: rofs mod");
1151         }
1152         /*
1153          * Write back each (modified) inode.
1154          */
1155         args.context = context;
1156         args.waitfor = waitfor;
1157         args.error = 0;
1158         /*
1159          * ffs_sync_callback will be called for each vnode
1160          * hung off of this mount point... the vnode will be
1161          * properly referenced and unreferenced around the callback
1162          */
1163         vnode_iterate(mp, 0, ffs_sync_callback, (void *)&args);
1164
1165         if (args.error)
1166                 allerror = args.error;
1167
1168         /*
1169          * Force stale file system control information to be flushed.
1170          */
1171         if (error = VNOP_FSYNC(ump->um_devvp, waitfor, context))
1172                 allerror = error;
1173 #if QUOTA
1174         qsync(mp);
1175 #endif
1176         /*
1177          * Write back modified superblock.
1178          */
1179         if (fs->fs_fmod != 0) {
1180                 fs->fs_fmod = 0;
1181                 microtime(&tv);
1182                 fs->fs_time = tv.tv_sec;
1183                 if (error = ffs_sbupdate(ump, waitfor))
1184                         allerror = error;
1185         }
1186         return (allerror);
1187 }
1188
1189 /*
1190  * Look up a FFS dinode number to find its incore vnode, otherwise read it
1191  * in from disk.  If it is in core, wait for the lock bit to clear, then
1192  * return the inode locked.  Detection and handling of mount points must be
1193  * done by the calling routine.
1194  */
1195 int
1196 ffs_vget(mp, ino, vpp, context)
1197         mount_t mp;
1198         ino64_t ino;
1199         vnode_t *vpp;
1200         vfs_context_t context;
1201 {
1202         return(ffs_vget_internal(mp, (ino_t)ino, vpp, NULL, NULL, 0, 0));
1203 }
1204
1205
1206 int
1207 ffs_vget_internal(mp, ino, vpp, dvp, cnp, mode, fhwanted)
1208         mount_t mp;
1209         ino_t   ino;
1210         vnode_t *vpp;
1211         vnode_t dvp;
1212         struct  componentname *cnp;
1213         int     mode;
1214         int     fhwanted;
1215 {
1216         struct proc *p = current_proc();                /* XXX */
1217         struct fs *fs;
1218         struct inode *ip;
1219         struct ufsmount *ump;
1220         struct buf *bp;
1221         struct vnode *vp;
1222         struct vnode_fsparam vfsp;
1223         struct timeval tv;
1224         enum vtype vtype;
1225         dev_t dev;
1226         int i, type, error = 0;
1227
1228         *vpp = NULL;
1229         ump  = VFSTOUFS(mp);
1230         dev  = ump->um_dev;
1231 #if 0
1232         /* Check for unmount in progress */
1233         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1234                 return (EPERM);
1235         }
1236 #endif
1237         /*
1238          * Allocate a new inode... do it before we check the
1239          * cache, because the MALLOC_ZONE may block
1240          */
1241         type = M_FFSNODE;
1242         MALLOC_ZONE(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
1243
1244         /*
1245          * check in the inode hash
1246          */
1247         if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
1248                /*
1249                 * found it... get rid of the allocation
1250                 * that we didn't need and return
1251                 * the 'found' vnode
1252                 */
1253                 FREE_ZONE(ip, sizeof(struct inode), type);
1254                 vp = *vpp;
1255                 return (0);
1256         }
1257         bzero((caddr_t)ip, sizeof(struct inode));
1258         /*
1259          * lock the inode
1260          */
1261 //      lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
1262 //      lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct slock *)0, p);
1263
1264         ip->i_fs = fs = ump->um_fs;
1265         ip->i_dev = dev;
1266         ip->i_number = ino;
1267 #if QUOTA
1268         for (i = 0; i < MAXQUOTAS; i++)
1269                 ip->i_dquot[i] = NODQUOT;
1270 #endif
1271         SET(ip->i_flag, IN_ALLOC);
1272         /*
1273          * Put it onto its hash chain locked so that other requests for
1274          * this inode will block if they arrive while we are sleeping waiting
1275          * for old data structures to be purged or for the contents of the
1276          * disk portion of this inode to be read.
1277          */
1278         ufs_ihashins(ip);
1279
1280         /* Read in the disk contents for the inode, copy into the inode. */
1281         if (error = (int)buf_bread(ump->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ino))),
1282                                    (int)fs->fs_bsize, NOCRED, &bp)) {
1283                 buf_brelse(bp);
1284                 goto errout;
1285         }
1286 #if REV_ENDIAN_FS
1287         if (mp->mnt_flag & MNT_REVEND) {
1288                 byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino)),ip);
1289         } else {
1290                 ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1291         }
1292 #else
1293         ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1294 #endif /* REV_ENDIAN_FS */
1295         buf_brelse(bp);
1296
1297         if (mode == 0)
1298                 vtype = IFTOVT(ip->i_mode);
1299         else
1300                 vtype = IFTOVT(mode);
1301
1302         if (vtype == VNON) {
1303                 if (fhwanted) {
1304                         /* NFS is in play */
1305                         error = ESTALE;
1306                         goto errout;
1307                 } else {
1308                         error = ENOENT;
1309                         goto errout;
1310                 }
1311         }
1312
1313         vfsp.vnfs_mp = mp;
1314         vfsp.vnfs_vtype = vtype;
1315         vfsp.vnfs_str = "ufs";
1316         vfsp.vnfs_dvp = dvp;
1317         vfsp.vnfs_fsnode = ip;
1318         vfsp.vnfs_cnp = cnp;
1319
1320         if (mode == 0)
1321                 vfsp.vnfs_filesize = ip->i_din.di_size;
1322         else
1323                 vfsp.vnfs_filesize = 0;
1324
1325         if (vtype == VFIFO )
1326                 vfsp.vnfs_vops = FFS_FIFOOPS;
1327         else if (vtype == VBLK || vtype == VCHR)
1328                 vfsp.vnfs_vops = ffs_specop_p;
1329         else
1330                 vfsp.vnfs_vops = ffs_vnodeop_p;
1331
1332         if (vtype == VBLK || vtype == VCHR)
1333                 vfsp.vnfs_rdev = ip->i_rdev;
1334         else
1335                 vfsp.vnfs_rdev = 0;
1336
1337         if (dvp && cnp && (cnp->cn_flags & MAKEENTRY))
1338                 vfsp.vnfs_flags = 0;
1339         else
1340                 vfsp.vnfs_flags = VNFS_NOCACHE;
1341
1342         /*
1343          * Tag root directory
1344          */
1345         vfsp.vnfs_markroot = (ip->i_number == ROOTINO);
1346         vfsp.vnfs_marksystem = 0;
1347
1348         if ((error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp)))
1349                 goto errout;
1350
1351         /*
1352          * Finish inode initialization now that aliasing has been resolved.
1353          */
1354         ip->i_devvp = ump->um_devvp;
1355         ip->i_vnode = vp;
1356
1357         vnode_ref(ip->i_devvp);
1358         vnode_addfsref(vp);
1359         vnode_settag(vp, VT_UFS);
1360
1361         /*
1362          * Initialize modrev times
1363          */
1364         microtime(&tv);
1365         SETHIGH(ip->i_modrev, tv.tv_sec);
1366         SETLOW(ip->i_modrev, tv.tv_usec * 4294);
1367
1368         /*
1369          * Set up a generation number for this inode if it does not
1370          * already have one. This should only happen on old filesystems.
1371          */
1372         if (ip->i_gen == 0) {
1373                 if (++nextgennumber < (u_long)tv.tv_sec)
1374                         nextgennumber = tv.tv_sec;
1375                 ip->i_gen = nextgennumber;
1376                 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1377                         ip->i_flag |= IN_MODIFIED;
1378         }
1379         /*
1380          * Ensure that uid and gid are correct. This is a temporary
1381          * fix until fsck has been changed to do the update.
1382          */
1383         if (fs->fs_inodefmt < FS_44INODEFMT) {          /* XXX */
1384                 ip->i_uid = ip->i_din.di_ouid;          /* XXX */
1385                 ip->i_gid = ip->i_din.di_ogid;          /* XXX */
1386         }                                               /* XXX */
1387         *vpp = vp;
1388
1389         CLR(ip->i_flag, IN_ALLOC);
1390
1391         if (ISSET(ip->i_flag, IN_WALLOC))
1392                 wakeup(ip);
1393
1394         return (0);
1395
1396 errout:
1397         ufs_ihashrem(ip);
1398
1399         if (ISSET(ip->i_flag, IN_WALLOC))
1400                 wakeup(ip);
1401         FREE_ZONE(ip, sizeof(struct inode), type);
1402
1403         return (error);
1404 }
1405
1406 /*
1407  * File handle to vnode
1408  *
1409  * Have to be really careful about stale file handles:
1410  * - check that the inode number is valid
1411  * - call vget to get the locked inode
1412  * - check for an unallocated inode (i_mode == 0)
1413  */
1414 int
1415 ffs_fhtovp(mp, fhlen, fhp, vpp, context)
1416         register struct mount *mp;
1417         int fhlen;
1418         unsigned char *fhp;
1419         struct vnode **vpp;
1420         vfs_context_t context;
1421 {
1422         register struct ufid *ufhp;
1423         register struct inode *ip;
1424         struct vnode *nvp;
1425         struct fs *fs;
1426         int error;
1427
1428         if (fhlen < (int)sizeof(struct ufid))
1429                 return (EINVAL);
1430         ufhp = (struct ufid *)fhp;
1431         fs = VFSTOUFS(mp)->um_fs;
1432         if (ufhp->ufid_ino < ROOTINO ||
1433             ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1434                 return (ESTALE);
1435         error = ffs_vget_internal(mp, ufhp->ufid_ino, &nvp, NULL, NULL, 0, 1);
1436         if (error) {
1437                 *vpp = NULLVP;
1438                 return (error);
1439         }
1440         ip = VTOI(nvp);
1441         if (ip->i_mode == 0 || ip->i_gen != ufhp->ufid_gen) {
1442                 vnode_put(nvp);
1443                 *vpp = NULLVP;
1444                 return (ESTALE);
1445         }
1446         *vpp = nvp;
1447         return (0);
1448 }
1449
1450 /*
1451  * Vnode pointer to File handle
1452  */
1453 /* ARGSUSED */
1454 int
1455 ffs_vptofh(vp, fhlenp, fhp, context)
1456         struct vnode *vp;
1457         int *fhlenp;
1458         unsigned char *fhp;
1459         vfs_context_t context;
1460 {
1461         register struct inode *ip;
1462         register struct ufid *ufhp;
1463
1464         if (*fhlenp < (int)sizeof(struct ufid))
1465                 return (EOVERFLOW);
1466         ip = VTOI(vp);
1467         ufhp = (struct ufid *)fhp;
1468         ufhp->ufid_ino = ip->i_number;
1469         ufhp->ufid_gen = ip->i_gen;
1470         *fhlenp = sizeof(struct ufid);
1471         return (0);
1472 }
1473
1474 /*
1475  * Initialize the filesystem; just use ufs_init.
1476  */
1477 int
1478 ffs_init(vfsp)
1479         struct vfsconf *vfsp;
1480 {
1481
1482         return (ufs_init(vfsp));
1483 }
1484
1485 /*
1486  * fast filesystem related variables.
1487  */
1488 ffs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
1489                    user_addr_t newp, size_t newlen, vfs_context_t context)
1490 {
1491         extern int doclusterread, doclusterwrite, doreallocblks, doasyncfree;
1492
1493         /* all sysctl names at this level are terminal */
1494         if (namelen != 1)
1495                 return (ENOTDIR);               /* overloaded */
1496
1497         switch (name[0]) {
1498         case FFS_CLUSTERREAD:
1499                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1500                     &doclusterread));
1501         case FFS_CLUSTERWRITE:
1502                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1503                     &doclusterwrite));
1504         case FFS_REALLOCBLKS:
1505                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1506                     &doreallocblks));
1507         case FFS_ASYNCFREE:
1508                 return (sysctl_int(oldp, oldlenp, newp, newlen, &doasyncfree));
1509         default:
1510                 return (ENOTSUP);
1511         }
1512         /* NOTREACHED */
1513 }
1514
1515 /*
1516  * Write a superblock and associated information back to disk.
1517  */
1518 int
1519 ffs_sbupdate(mp, waitfor)
1520         struct ufsmount *mp;
1521         int waitfor;
1522 {
1523         register struct fs *dfs, *fs = mp->um_fs;
1524         register struct buf *bp;
1525         int blks;
1526         void *space;
1527         int i, size, error, allerror = 0;
1528         int devBlockSize=0;
1529 #if REV_ENDIAN_FS
1530         int rev_endian=(mp->um_mountp->mnt_flag & MNT_REVEND);
1531 #endif /* REV_ENDIAN_FS */
1532
1533         /*
1534          * First write back the summary information.
1535          */
1536         blks = howmany(fs->fs_cssize, fs->fs_fsize);
1537         space = fs->fs_csp;
1538         for (i = 0; i < blks; i += fs->fs_frag) {
1539                 size = fs->fs_bsize;
1540                 if (i + fs->fs_frag > blks)
1541                         size = (blks - i) * fs->fs_fsize;
1542                 bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
1543                                 size, 0, 0, BLK_META);
1544                 bcopy(space, (char *)buf_dataptr(bp), (u_int)size);
1545 #if REV_ENDIAN_FS
1546                 if (rev_endian) {
1547                         byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
1548                 }
1549 #endif /* REV_ENDIAN_FS */
1550                 space = (char *)space + size;
1551                 if (waitfor != MNT_WAIT)
1552                         buf_bawrite(bp);
1553                 else if (error = (int)buf_bwrite(bp))
1554                         allerror = error;
1555         }
1556         /*
1557          * Now write back the superblock itself. If any errors occurred
1558          * up to this point, then fail so that the superblock avoids
1559          * being written out as clean.
1560          */
1561         if (allerror)
1562                 return (allerror);
1563         devBlockSize = vfs_devblocksize(mp->um_mountp);
1564
1565         bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)(SBOFF/devBlockSize)), (int)fs->fs_sbsize, 0, 0, BLK_META);
1566         bcopy((caddr_t)fs, (char *)buf_dataptr(bp), (u_int)fs->fs_sbsize);
1567         /* Restore compatibility to old file systems.              XXX */
1568         dfs = (struct fs *)buf_dataptr(bp);                     /* XXX */
1569         if (fs->fs_postblformat == FS_42POSTBLFMT)              /* XXX */
1570                 dfs->fs_nrpos = -1;                             /* XXX */
1571 #if REV_ENDIAN_FS
1572         /*
1573         *  Swapping bytes here ; so that in case
1574         *   of inode format < FS_44INODEFMT appropriate
1575         *   fields get moved
1576         */
1577         if (rev_endian) {
1578                 byte_swap_sbout((struct fs *)buf_dataptr(bp));
1579         }
1580 #endif /* REV_ENDIAN_FS */
1581         if (fs->fs_inodefmt < FS_44INODEFMT) {                  /* XXX */
1582                 int32_t *lp, tmp;                               /* XXX */
1583                                                                 /* XXX */
1584                 lp = (int32_t *)&dfs->fs_qbmask;                /* XXX */
1585                 tmp = lp[4];                                    /* XXX */
1586                 for (i = 4; i > 0; i--)                         /* XXX */
1587                         lp[i] = lp[i-1];                        /* XXX */
1588                 lp[0] = tmp;                                    /* XXX */
1589         }                                                       /* XXX */
1590 #if REV_ENDIAN_FS
1591         /* Note that dfs is already swapped so swap the filesize
1592         *  before writing
1593         */
1594         if (rev_endian) {
1595                 dfs->fs_maxfilesize = NXSwapLongLong(mp->um_savedmaxfilesize);          /* XXX */
1596         } else {
1597 #endif /* REV_ENDIAN_FS */
1598                 dfs->fs_maxfilesize = mp->um_savedmaxfilesize;  /* XXX */
1599 #if REV_ENDIAN_FS
1600         }
1601 #endif /* REV_ENDIAN_FS */
1602         if (waitfor != MNT_WAIT)
1603                 buf_bawrite(bp);
1604         else if (error = (int)buf_bwrite(bp))
1605                 allerror = error;
1606
1607         return (allerror);
1608 }