bsd/ufs/ffs/ffs_vfsops.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1989, 1991, 1993, 1994
  31  *      The Regents of the University of California.  All rights reserved.
  32  *
  33  * Redistribution and use in source and binary forms, with or without
  34  * modification, are permitted provided that the following conditions
  35  * are met:
  36  * 1. Redistributions of source code must retain the above copyright
  37  *    notice, this list of conditions and the following disclaimer.
  38  * 2. Redistributions in binary form must reproduce the above copyright
  39  *    notice, this list of conditions and the following disclaimer in the
  40  *    documentation and/or other materials provided with the distribution.
  41  * 3. All advertising materials mentioning features or use of this software
  42  *    must display the following acknowledgement:
  43  *      This product includes software developed by the University of
  44  *      California, Berkeley and its contributors.
  45  * 4. Neither the name of the University nor the names of its contributors
  46  *    may be used to endorse or promote products derived from this software
  47  *    without specific prior written permission.
  48  *
  49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  59  * SUCH DAMAGE.
  60  *
  61  *      @(#)ffs_vfsops.c        8.31 (Berkeley) 5/20/95
  62  */
  63
  64 #include <rev_endian_fs.h>
  65 #include <sys/param.h>
  66 #include <sys/systm.h>
  67 #include <sys/namei.h>
  68 #include <sys/proc.h>
  69 #include <sys/kauth.h>
  70 #include <sys/kernel.h>
  71 #include <sys/vnode_internal.h>
  72 #include <sys/socket.h>
  73 #include <sys/mount_internal.h>
  74 #include <sys/mount.h>
  75 #include <sys/buf.h>
  76 #include <sys/mbuf.h>
  77 #include <sys/file.h>
  78 #include <sys/disk.h>
  79 #include <sys/ioctl.h>
  80 #include <sys/errno.h>
  81 #include <sys/malloc.h>
  82 #include <sys/ubc.h>
  83 #include <sys/quota.h>
  84
  85 #include <miscfs/specfs/specdev.h>
  86
  87 #include <ufs/ufs/quota.h>
  88 #include <ufs/ufs/ufsmount.h>
  89 #include <ufs/ufs/inode.h>
  90 #include <ufs/ufs/ufs_extern.h>
  91
  92 #include <ufs/ffs/fs.h>
  93 #include <ufs/ffs/ffs_extern.h>
  94 #if REV_ENDIAN_FS
  95 #include <ufs/ufs/ufs_byte_order.h>
  96 #include <libkern/OSByteOrder.h>
  97 #endif /* REV_ENDIAN_FS */
  98
  99 int ffs_sbupdate(struct ufsmount *, int);
 100
 101 struct vfsops ufs_vfsops = {
 102         ffs_mount,
 103         ufs_start,
 104         ffs_unmount,
 105         ufs_root,
 106         ufs_quotactl,
 107         ffs_vfs_getattr,
 108         ffs_sync,
 109         ffs_vget,
 110         ffs_fhtovp,
 111         ffs_vptofh,
 112         ffs_init,
 113         ffs_sysctl,
 114         ffs_vfs_setattr,
 115         {0}
 116 };
 117
 118 extern u_long nextgennumber;
 119
 120 union _qcvt {
 121         int64_t qcvt;
 122         int32_t val[2];
 123 };
 124 #define SETHIGH(q, h) { \
 125         union _qcvt tmp; \
 126         tmp.qcvt = (q); \
 127         tmp.val[_QUAD_HIGHWORD] = (h); \
 128         (q) = tmp.qcvt; \
 129 }
 130 #define SETLOW(q, l) { \
 131         union _qcvt tmp; \
 132         tmp.qcvt = (q); \
 133         tmp.val[_QUAD_LOWWORD] = (l); \
 134         (q) = tmp.qcvt; \
 135 }
 136
 137 /*
 138  * Called by main() when ufs is going to be mounted as root.
 139  */
 140 int
 141 ffs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
 142 {
 143         struct proc *p = current_proc();        /* XXX */
 144         int     error;
 145
 146         /* Set asynchronous flag by default */
 147         vfs_setflags(mp, MNT_ASYNC);
 148
 149         if (error = ffs_mountfs(rvp, mp, context))
 150                 return (error);
 151
 152         (void)ffs_statfs(mp, vfs_statfs(mp), NULL);
 153
 154         return (0);
 155 }
 156
 157 /*
 158  * VFS Operations.
 159  *
 160  * mount system call
 161  */
 162 int
 163 ffs_mount(struct mount *mp, vnode_t devvp, __unused user_addr_t data,  vfs_context_t context)
 164 {
 165         struct proc *p = vfs_context_proc(context);
 166         struct ufsmount *ump;
 167         register struct fs *fs;
 168         u_int size;
 169         int error  = 0, flags;
 170         mode_t accessmode;
 171         int ronly;
 172         int reload = 0;
 173
 174         /*
 175          * If updating, check whether changing from read-write to
 176          * read-only; if there is no device name, that's all we do.
 177          */
 178         if (mp->mnt_flag & MNT_UPDATE) {
 179                 ump = VFSTOUFS(mp);
 180                 fs = ump->um_fs;
 181                 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 182                         /*
 183                          * Flush any dirty data.
 184                          */
 185                         VFS_SYNC(mp, MNT_WAIT, context);
 186                         /*
 187                          * Check for and optionally get rid of files open
 188                          * for writing.
 189                          */
 190                         flags = WRITECLOSE;
 191                         if (mp->mnt_flag & MNT_FORCE)
 192                                 flags |= FORCECLOSE;
 193                         if (error = ffs_flushfiles(mp, flags, p))
 194                                 return (error);
 195                         fs->fs_clean = 1;
 196                         fs->fs_ronly = 1;
 197                         if (error = ffs_sbupdate(ump, MNT_WAIT)) {
 198                                 fs->fs_clean = 0;
 199                                 fs->fs_ronly = 0;
 200                                 return (error);
 201                         }
 202                 }
 203                 /* save fs_ronly to later use */
 204                 ronly = fs->fs_ronly;
 205                 if ((mp->mnt_flag & MNT_RELOAD) || ronly)
 206                         reload = 1;
 207                 if ((reload) &&
 208                     (error = ffs_reload(mp, vfs_context_ucred(context), p)))
 209                         return (error);
 210                 /* replace the ronly after load */
 211                 fs->fs_ronly = ronly;
 212                 /*
 213                 * Do not update the file system if the user was in singleuser
 214                 * and then tries to mount -uw without fscking
 215                 */
 216                 if (!fs->fs_clean && ronly) {
 217                         printf("WARNING: trying to mount a dirty file system\n");
 218                         if (issingleuser() && (mp->mnt_flag & MNT_ROOTFS)) {
 219                                 printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",fs->fs_fsmnt);
 220                                 /*
 221                                  * Reset the readonly bit as reload might have
 222                                  * modified this bit
 223                                  */
 224                                 fs->fs_ronly = 1;
 225                                 return(EPERM);
 226                         }
 227                 }
 228
 229                 if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 230                         fs->fs_ronly = 0;
 231                         fs->fs_clean = 0;
 232                         (void) ffs_sbupdate(ump, MNT_WAIT);
 233                 }
 234                 if (devvp == 0) {
 235                         return(0);
 236                 }
 237         }
 238         if ((mp->mnt_flag & MNT_UPDATE) == 0)
 239                 error = ffs_mountfs(devvp, mp, context);
 240         else {
 241                 if (devvp != ump->um_devvp)
 242                         error = EINVAL; /* needs translation */
 243         }
 244         if (error) {
 245                 return (error);
 246         }
 247         ump = VFSTOUFS(mp);
 248         fs = ump->um_fs;
 249         bzero(fs->fs_fsmnt , sizeof(fs->fs_fsmnt));
 250         strncpy(fs->fs_fsmnt,  (caddr_t)mp->mnt_vfsstat.f_mntonname, sizeof(fs->fs_fsmnt) - 1);
 251         (void)ffs_statfs(mp, &mp->mnt_vfsstat, p);
 252         return (0);
 253 }
 254
 255
 256 struct ffs_reload_cargs {
 257         struct vnode    *devvp;
 258         kauth_cred_t cred;
 259         struct fs       *fs;
 260         struct proc     *p;
 261         int             error;
 262 #if REV_ENDIAN_FS
 263         int             rev_endian;
 264 #endif /* REV_ENDIAN_FS */
 265 };
 266
 267
 268 static int
 269 ffs_reload_callback(struct vnode *vp, void *cargs)
 270 {
 271         struct inode *ip;
 272         struct buf   *bp;
 273         struct fs    *fs;
 274         struct ffs_reload_cargs *args;
 275
 276         args = (struct ffs_reload_cargs *)cargs;
 277
 278         /*
 279          * flush all the buffers associated with this node
 280          */
 281         if (buf_invalidateblks(vp, 0, 0, 0))
 282                 panic("ffs_reload: dirty2");
 283
 284         /*
 285          * Step 6: re-read inode data
 286          */
 287         ip = VTOI(vp);
 288         fs = args->fs;
 289
 290         if (args->error = (int)buf_bread(args->devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ip->i_number))),
 291                                          (int)fs->fs_bsize, NOCRED, &bp)) {
 292                 buf_brelse(bp);
 293
 294                 return (VNODE_RETURNED_DONE);
 295         }
 296
 297 #if REV_ENDIAN_FS
 298         if (args->rev_endian) {
 299                 byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) +
 300                                     ino_to_fsbo(fs, ip->i_number)), ip);
 301         } else {
 302 #endif /* REV_ENDIAN_FS */
 303                 ip->i_din = *((struct dinode *)buf_dataptr(bp) +
 304                               ino_to_fsbo(fs, ip->i_number));
 305 #if REV_ENDIAN_FS
 306         }
 307 #endif /* REV_ENDIAN_FS */
 308
 309         buf_brelse(bp);
 310
 311         return (VNODE_RETURNED);
 312 }
 313
 314
 315 /*
 316  * Reload all incore data for a filesystem (used after running fsck on
 317  * the root filesystem and finding things to fix). The filesystem must
 318  * be mounted read-only.
 319  *
 320  * Things to do to update the mount:
 321  *      1) invalidate all cached meta-data.
 322  *      2) re-read superblock from disk.
 323  *      3) re-read summary information from disk.
 324  *      4) invalidate all inactive vnodes.
 325  *      5) invalidate all cached file data.
 326  *      6) re-read inode data for all active vnodes.
 327  */
 328 ffs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p)
 329 {
 330         register struct vnode *devvp;
 331         void *space;
 332         struct buf *bp;
 333         struct fs *fs, *newfs;
 334         int i, blks, size, error;
 335         u_int64_t maxfilesize;                                  /* XXX */
 336         int32_t *lp;
 337         struct ffs_reload_cargs args;
 338 #if REV_ENDIAN_FS
 339         int rev_endian = (mountp->mnt_flag & MNT_REVEND);
 340 #endif /* REV_ENDIAN_FS */
 341
 342         if ((mountp->mnt_flag & MNT_RDONLY) == 0)
 343                 return (EINVAL);
 344         /*
 345          * Step 1: invalidate all cached meta-data.
 346          */
 347         devvp = VFSTOUFS(mountp)->um_devvp;
 348         if (buf_invalidateblks(devvp, 0, 0, 0))
 349                 panic("ffs_reload: dirty1");
 350         /*
 351          * Step 2: re-read superblock from disk.
 352          */
 353         size = vfs_devblocksize(mountp);
 354
 355         if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)), SBSIZE, NOCRED,&bp)) {
 356                 buf_brelse(bp);
 357                 return (error);
 358         }
 359         newfs = (struct fs *)buf_dataptr(bp);
 360 #if REV_ENDIAN_FS
 361         if (rev_endian) {
 362                 error = byte_swap_sbin(newfs);
 363                 if (error) {
 364                         buf_brelse(bp);
 365                         return (error);
 366                 }
 367         }
 368 #endif /* REV_ENDIAN_FS */
 369         if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
 370             newfs->fs_bsize < sizeof(struct fs)) {
 371 #if REV_ENDIAN_FS
 372                 if (rev_endian)
 373                         byte_swap_sbout(newfs);
 374 #endif /* REV_ENDIAN_FS */
 375
 376                 buf_brelse(bp);
 377                 return (EIO);           /* XXX needs translation */
 378         }
 379         fs = VFSTOUFS(mountp)->um_fs;
 380         /*
 381          * Copy pointer fields back into superblock before copying in   XXX
 382          * new superblock. These should really be in the ufsmount.      XXX
 383          * Note that important parameters (eg fs_ncg) are unchanged.
 384          */
 385         newfs->fs_csp = fs->fs_csp;
 386         newfs->fs_maxcluster = fs->fs_maxcluster;
 387         newfs->fs_contigdirs = fs->fs_contigdirs;
 388         bcopy(newfs, fs, (u_int)fs->fs_sbsize);
 389         if (fs->fs_sbsize < SBSIZE)
 390                 buf_markinvalid(bp);
 391 #if REV_ENDIAN_FS
 392         if (rev_endian)
 393                 byte_swap_sbout(newfs);
 394 #endif /* REV_ENDIAN_FS */
 395         buf_brelse(bp);
 396         mountp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 397         ffs_oldfscompat(fs);
 398         maxfilesize = 0x100000000ULL;    /* 4GB */
 399         if (fs->fs_maxfilesize > maxfilesize)                   /* XXX */
 400                 fs->fs_maxfilesize = maxfilesize;               /* XXX */
 401         /*
 402          * Step 3: re-read summary information from disk.
 403          */
 404         blks = howmany(fs->fs_cssize, fs->fs_fsize);
 405         space = fs->fs_csp;
 406         for (i = 0; i < blks; i += fs->fs_frag) {
 407                 size = fs->fs_bsize;
 408                 if (i + fs->fs_frag > blks)
 409                         size = (blks - i) * fs->fs_fsize;
 410                 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)), size,
 411                                            NOCRED, &bp)) {
 412                         buf_brelse(bp);
 413                         return (error);
 414                 }
 415 #if REV_ENDIAN_FS
 416                 if (rev_endian) {
 417                         /* csum swaps */
 418                         byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
 419                 }
 420 #endif /* REV_ENDIAN_FS */
 421                 bcopy((char *)buf_dataptr(bp), space, (u_int)size);
 422 #if REV_ENDIAN_FS
 423                 if (rev_endian) {
 424                         /* csum swaps */
 425                         byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
 426                 }
 427 #endif /* REV_ENDIAN_FS */
 428                 space = (char *) space + size;
 429                 buf_brelse(bp);
 430         }
 431         /*
 432          * We no longer know anything about clusters per cylinder group.
 433          */
 434         if (fs->fs_contigsumsize > 0) {
 435                 lp = fs->fs_maxcluster;
 436                 for (i = 0; i < fs->fs_ncg; i++)
 437                         *lp++ = fs->fs_contigsumsize;
 438         }
 439 #if REV_ENDIAN_FS
 440         args.rev_endian = rev_endian;
 441 #endif /* REV_ENDIAN_FS */
 442         args.devvp = devvp;
 443         args.cred = cred;
 444         args.fs = fs;
 445         args.p = p;
 446         args.error = 0;
 447         /*
 448          * ffs_reload_callback will be called for each vnode
 449          * hung off of this mount point that can't be recycled...
 450          * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
 451          * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
 452          * properly referenced and unreferenced around the callback
 453          */
 454         vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, ffs_reload_callback, (void *)&args);
 455
 456         return (args.error);
 457 }
 458
 459 /*
 460  * Common code for mount and mountroot
 461  */
 462 int
 463 ffs_mountfs(devvp, mp, context)
 464         struct vnode *devvp;
 465         struct mount *mp;
 466         vfs_context_t context;
 467 {
 468         struct ufsmount *ump;
 469         struct buf *bp;
 470         struct fs *fs;
 471         dev_t dev;
 472         struct buf *cgbp;
 473         struct cg *cgp;
 474         int32_t clustersumoff;
 475         void *space;
 476         int error, i, blks, ronly;
 477         u_int32_t size;
 478         int32_t *lp;
 479         kauth_cred_t cred;
 480         u_int64_t maxfilesize;                                  /* XXX */
 481         u_int dbsize = DEV_BSIZE;
 482 #if REV_ENDIAN_FS
 483         int rev_endian=0;
 484 #endif /* REV_ENDIAN_FS */
 485         dev = devvp->v_rdev;
 486         cred = vfs_context_ucred(context);
 487
 488         ronly = vfs_isrdonly(mp);
 489         bp  = NULL;
 490         ump = NULL;
 491
 492         /* Advisory locking should be handled at the VFS layer */
 493         vfs_setlocklocal(mp);
 494
 495         /* Obtain the actual device block size */
 496         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&size, 0, context)) {
 497                 error = ENXIO;
 498                 goto out;
 499         }
 500
 501         if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)),
 502             SBSIZE, cred, &bp))
 503                 goto out;
 504         fs = (struct fs *)buf_dataptr(bp);
 505 #if REV_ENDIAN_FS
 506         if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
 507             fs->fs_bsize < sizeof(struct fs)) {
 508                 int magic = fs->fs_magic;
 509
 510                 byte_swap_ints(&magic, 1);
 511                 if (magic != FS_MAGIC) {
 512                         error = EINVAL;
 513                         goto out;
 514                 }
 515                 if (error = byte_swap_sbin(fs))
 516                         goto out;
 517
 518                 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
 519                         fs->fs_bsize < sizeof(struct fs)) {
 520                         byte_swap_sbout(fs);
 521                         error = EINVAL;         /* XXX needs translation */
 522                         goto out;
 523                 }
 524                 rev_endian=1;
 525         }
 526 #endif /* REV_ENDIAN_FS */
 527         if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
 528             fs->fs_bsize < sizeof(struct fs)) {
 529 #if REV_ENDIAN_FS
 530                 if (rev_endian)
 531                         byte_swap_sbout(fs);
 532 #endif /* REV_ENDIAN_FS */
 533                 error = EINVAL;         /* XXX needs translation */
 534                 goto out;
 535         }
 536
 537         if (fs->fs_sbsize < 0 || fs->fs_sbsize > SBSIZE) {
 538                 error = EINVAL;
 539                 goto out;
 540         }
 541
 542         /*
 543          * Buffer cache does not handle multiple pages in a buf when
 544
 545         /*
 546          * Buffer cache does not handle multiple pages in a buf when
 547          * invalidating incore buffer in pageout. There are no locks
 548          * in the pageout path.  So there is a danger of loosing data when
 549          * block allocation happens at the same time a pageout of buddy
 550          * page occurs. incore() returns buf with both
 551          * pages, this leads vnode-pageout to incorrectly flush of entire.
 552          * buf. Till the low level ffs code is modified to deal with these
 553          * do not mount any FS more than 4K size.
 554          */
 555         /*
 556          * Can't mount filesystems with a fragment size less than DIRBLKSIZ
 557          */
 558         /*
 559          * Don't mount dirty filesystems, except for the root filesystem
 560          */
 561         if ((fs->fs_bsize > PAGE_SIZE) || (fs->fs_fsize < DIRBLKSIZ) ||
 562         ((!(mp->mnt_flag & MNT_ROOTFS)) && (!fs->fs_clean))) {
 563 #if REV_ENDIAN_FS
 564                 if (rev_endian)
 565                         byte_swap_sbout(fs);
 566 #endif /* REV_ENDIAN_FS */
 567         error = ENOTSUP;
 568         goto out;
 569     }
 570
 571         /* Let's figure out the devblock size the file system is with */
 572         /* the device block size = fragment size / number of sectors per frag */
 573
 574         dbsize = fs->fs_fsize / NSPF(fs);
 575         if(dbsize <= 0 ) {
 576                 kprintf("device blocksize computaion failed\n");
 577         } else {
 578                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&dbsize,
 579                                 FWRITE, context) != 0) {
 580                         kprintf("failed to set device blocksize\n");
 581                 }
 582                 /* force the specfs to reread blocksize from size() */
 583                 set_fsblocksize(devvp);
 584         }
 585
 586         /* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
 587         if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
 588 #if REV_ENDIAN_FS
 589                 if (rev_endian)
 590                         byte_swap_sbout(fs);
 591 #endif /* REV_ENDIAN_FS */
 592                 error = EROFS;          /* needs translation */
 593                 goto out;
 594         }
 595
 596         /* If we are not mounting read only, then check for overlap
 597          * condition in cylinder group's free block map.
 598          * If overlap exists, then force this into a read only mount
 599          * to avoid further corruption. PR#2216969
 600          */
 601         if (ronly == 0){
 602             if (error = (int)buf_bread (devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, 0))),
 603                                         (int)fs->fs_cgsize, NOCRED, &cgbp)) {
 604                         buf_brelse(cgbp);
 605                         goto out;
 606                 }
 607                 cgp = (struct cg *)buf_dataptr(cgbp);
 608 #if REV_ENDIAN_FS
 609                 if (rev_endian)
 610                         byte_swap_cgin(cgp,fs);
 611 #endif /* REV_ENDIAN_FS */
 612                 if (!cg_chkmagic(cgp)){
 613 #if REV_ENDIAN_FS
 614                                 if (rev_endian)
 615                                         byte_swap_cgout(cgp,fs);
 616 #endif /* REV_ENDIAN_FS */
 617                         buf_brelse(cgbp);
 618                         goto out;
 619                 }
 620                 if (cgp->cg_clustersumoff != 0) {
 621                         /* Check for overlap */
 622                         clustersumoff = cgp->cg_freeoff +
 623                         howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY);
 624                         clustersumoff = roundup(clustersumoff, sizeof(long));
 625                         if (cgp->cg_clustersumoff < clustersumoff) {
 626                         /* Overlap exists */
 627                         mp->mnt_flag |= MNT_RDONLY;
 628                                 ronly = 1;
 629                         }
 630                 }
 631 #if REV_ENDIAN_FS
 632                         if (rev_endian)
 633                                 byte_swap_cgout(cgp,fs);
 634 #endif /* REV_ENDIAN_FS */
 635                         buf_brelse(cgbp);
 636         }
 637
 638         ump = _MALLOC(sizeof *ump, M_UFSMNT, M_WAITOK);
 639         bzero((caddr_t)ump, sizeof *ump);
 640         ump->um_fs = _MALLOC((u_long)fs->fs_sbsize, M_UFSMNT,
 641             M_WAITOK);
 642         bcopy((char *)buf_dataptr(bp), ump->um_fs, (u_int)fs->fs_sbsize);
 643         if (fs->fs_sbsize < SBSIZE)
 644                 buf_markinvalid(bp);
 645 #if REV_ENDIAN_FS
 646         if (rev_endian)
 647                 byte_swap_sbout(fs);
 648 #endif /* REV_ENDIAN_FS */
 649         buf_brelse(bp);
 650         bp = NULL;
 651         fs = ump->um_fs;
 652         fs->fs_ronly = ronly;
 653         if (fs->fs_cssize < 1 || fs->fs_fsize < 1 || fs->fs_ncg < 1) {
 654                 error = EINVAL;
 655                 goto out;
 656         }
 657         if (fs->fs_frag < 1 || fs->fs_frag > MAXFRAG) {
 658                 error = EINVAL;
 659                 goto out;
 660         }
 661
 662         size = fs->fs_cssize;
 663         blks = howmany(size, fs->fs_fsize);
 664         if (fs->fs_contigsumsize > 0) {
 665                 if (fs->fs_ncg > INT_MAX / sizeof(int32_t) || size > INT_MAX - fs->fs_ncg * sizeof(int32_t)) {
 666                         error = EINVAL;
 667                         goto out;
 668                 }
 669                 size += fs->fs_ncg * sizeof(int32_t);
 670         }
 671         if (fs->fs_ncg > INT_MAX / sizeof(u_int8_t) || size > INT_MAX - fs->fs_ncg * sizeof(u_int8_t)) {
 672                 error = EINVAL;
 673                 goto out;
 674         }
 675         size += fs->fs_ncg * sizeof(u_int8_t);
 676         space = _MALLOC((u_long)size, M_UFSMNT, M_WAITOK);
 677         fs->fs_csp = space;
 678         for (i = 0; i < blks; i += fs->fs_frag) {
 679                 size = fs->fs_bsize;
 680                 if (i + fs->fs_frag > blks)
 681                         size = (blks - i) * fs->fs_fsize;
 682                 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
 683                                            size, cred, &bp)) {
 684                         _FREE(fs->fs_csp, M_UFSMNT);
 685                         goto out;
 686                 }
 687                 bcopy((char *)buf_dataptr(bp), space, (u_int)size);
 688 #if REV_ENDIAN_FS
 689                 if (rev_endian)
 690                         byte_swap_ints((int *) space, size / sizeof(int));
 691 #endif /* REV_ENDIAN_FS */
 692                 space = (char *)space + size;
 693                 buf_brelse(bp);
 694                 bp = NULL;
 695         }
 696         if (fs->fs_contigsumsize > 0) {
 697                 fs->fs_maxcluster = lp = space;
 698                 for (i = 0; i < fs->fs_ncg; i++)
 699                         *lp++ = fs->fs_contigsumsize;
 700                 space = lp;
 701         }
 702         size = fs->fs_ncg * sizeof(u_int8_t);
 703         fs->fs_contigdirs = (u_int8_t *)space;
 704         space = (u_int8_t *)space + size;
 705         bzero(fs->fs_contigdirs, size);
 706         /* XXX Compatibility for old filesystems */
 707         if (fs->fs_avgfilesize <= 0)
 708                 fs->fs_avgfilesize = AVFILESIZ;
 709         if (fs->fs_avgfpdir <= 0)
 710                 fs->fs_avgfpdir = AFPDIR;
 711         /* XXX End of compatibility */
 712         mp->mnt_data = (qaddr_t)ump;
 713         mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
 714         mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
 715         /* XXX warning hardcoded max symlen and not "mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;" */
 716         mp->mnt_maxsymlinklen = 60;
 717 #if REV_ENDIAN_FS
 718         if (rev_endian)
 719                 mp->mnt_flag |= MNT_REVEND;
 720 #endif /* REV_ENDIAN_FS */
 721         ump->um_mountp = mp;
 722         ump->um_dev = dev;
 723         ump->um_devvp = devvp;
 724         ump->um_nindir = fs->fs_nindir;
 725         ump->um_bptrtodb = fs->fs_fsbtodb;
 726         ump->um_seqinc = fs->fs_frag;
 727         for (i = 0; i < MAXQUOTAS; i++)
 728                 dqfileinit(&ump->um_qfiles[i]);
 729         ffs_oldfscompat(fs);
 730         ump->um_savedmaxfilesize = fs->fs_maxfilesize;          /* XXX */
 731         maxfilesize = 0x100000000ULL;    /* 4GB */
 732 #if 0
 733         maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1; /* XXX */
 734 #endif /* 0 */
 735         if (fs->fs_maxfilesize > maxfilesize)                   /* XXX */
 736                 fs->fs_maxfilesize = maxfilesize;               /* XXX */
 737         if (ronly == 0) {
 738                 fs->fs_clean = 0;
 739                 (void) ffs_sbupdate(ump, MNT_WAIT);
 740         }
 741         return (0);
 742 out:
 743         if (bp)
 744                 buf_brelse(bp);
 745         if (ump) {
 746                 _FREE(ump->um_fs, M_UFSMNT);
 747                 _FREE(ump, M_UFSMNT);
 748         }
 749         return (error);
 750 }
 751
 752 /*
 753  * Sanity checks for old file systems.
 754  *
 755  * XXX - goes away some day.
 756  */
 757 ffs_oldfscompat(fs)
 758         struct fs *fs;
 759 {
 760         int i;
 761
 762         fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);       /* XXX */
 763         fs->fs_interleave = max(fs->fs_interleave, 1);          /* XXX */
 764         if (fs->fs_postblformat == FS_42POSTBLFMT)              /* XXX */
 765                 fs->fs_nrpos = 8;                               /* XXX */
 766         if (fs->fs_inodefmt < FS_44INODEFMT) {                  /* XXX */
 767                 u_int64_t sizepb = fs->fs_bsize;                /* XXX */
 768                                                                 /* XXX */
 769                 fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */
 770                 for (i = 0; i < NIADDR; i++) {                  /* XXX */
 771                         sizepb *= NINDIR(fs);                   /* XXX */
 772                         fs->fs_maxfilesize += sizepb;           /* XXX */
 773                 }                                               /* XXX */
 774                 fs->fs_qbmask = ~fs->fs_bmask;                  /* XXX */
 775                 fs->fs_qfmask = ~fs->fs_fmask;                  /* XXX */
 776         }                                                       /* XXX */
 777         return (0);
 778 }
 779
 780 /*
 781  * unmount system call
 782  */
 783 int
 784 ffs_unmount(mp, mntflags, context)
 785         struct mount *mp;
 786         int mntflags;
 787         vfs_context_t context;
 788 {
 789         struct proc *p = vfs_context_proc(context);
 790         register struct ufsmount *ump;
 791         register struct fs *fs;
 792         int error, flags;
 793         int force;
 794
 795         flags = 0;
 796         force = 0;
 797         if (mntflags & MNT_FORCE) {
 798                 flags |= FORCECLOSE;
 799                 force = 1;
 800         }
 801         if ( (error = ffs_flushfiles(mp, flags, p)) && !force )
 802                 return (error);
 803         ump = VFSTOUFS(mp);
 804         fs = ump->um_fs;
 805
 806         if (fs->fs_ronly == 0) {
 807                 fs->fs_clean = 1;
 808                 if (error = ffs_sbupdate(ump, MNT_WAIT)) {
 809                         fs->fs_clean = 0;
 810 #ifdef notyet
 811                 /* we can atleast cleanup ; as the media could be WP */
 812                 /* & during mount, we do not check for write failures  */
 813                 /* FIXME LATER : the Correct fix would be to have */
 814                 /* mount detect the WP media and downgrade to readonly mount */
 815                 /* For now, here it is */
 816                         return (error);
 817 #endif /* notyet */
 818                 }
 819         }
 820         _FREE(fs->fs_csp, M_UFSMNT);
 821         _FREE(fs, M_UFSMNT);
 822         _FREE(ump, M_UFSMNT);
 823
 824         return (0);
 825 }
 826
 827 /*
 828  * Flush out all the files in a filesystem.
 829  */
 830 ffs_flushfiles(mp, flags, p)
 831         register struct mount *mp;
 832         int flags;
 833         struct proc *p;
 834 {
 835         register struct ufsmount *ump;
 836         int i, error;
 837
 838         ump = VFSTOUFS(mp);
 839
 840 #if QUOTA
 841         /*
 842          * NOTE: The open quota files have an indirect reference
 843          * on the root directory vnode.  We must account for this
 844          * extra reference when doing the intial vflush.
 845          */
 846         if (mp->mnt_flag & MNT_QUOTA) {
 847                 struct vnode *rootvp = NULLVP;
 848                 int quotafilecnt = 0;
 849
 850                 /* Find out how many quota files we have open. */
 851                 for (i = 0; i < MAXQUOTAS; i++) {
 852                         if (ump->um_qfiles[i].qf_vp != NULLVP)
 853                                 ++quotafilecnt;
 854                 }
 855
 856                 /*
 857                  * Check if the root vnode is in our inode hash
 858                  * (so we can skip over it).
 859                  */
 860                 rootvp = ufs_ihashget(ump->um_dev, ROOTINO);
 861
 862                 error = vflush(mp, rootvp, SKIPSYSTEM|flags);
 863
 864                 if (rootvp) {
 865                         /*
 866                          * See if there are additional references on the
 867                          * root vp besides the ones obtained from the open
 868                          * quota files and the hfs_chashget call above.
 869                          */
 870                         if ((error == 0) &&
 871                             (rootvp->v_usecount > (1 + quotafilecnt))) {
 872                                 error = EBUSY;  /* root dir is still open */
 873                         }
 874                         vnode_put(rootvp);
 875                 }
 876                 if (error && (flags & FORCECLOSE) == 0)
 877                         return (error);
 878
 879                 for (i = 0; i < MAXQUOTAS; i++) {
 880                         if (ump->um_qfiles[i].qf_vp == NULLVP)
 881                                 continue;
 882                         quotaoff(mp, i);
 883                 }
 884                 /*
 885                  * Here we fall through to vflush again to ensure
 886                  * that we have gotten rid of all the system vnodes.
 887                  */
 888         }
 889 #endif
 890         error = vflush(mp, NULLVP, SKIPSWAP|flags);
 891         error = vflush(mp, NULLVP, flags);
 892         return (error);
 893 }
 894
 895 /*
 896  * Get file system statistics.
 897  */
 898 int
 899 ffs_statfs(mp, sbp, context)
 900         struct mount *mp;
 901         register struct vfsstatfs *sbp;
 902         vfs_context_t context;
 903 {
 904         register struct ufsmount *ump;
 905         register struct fs *fs;
 906
 907         ump = VFSTOUFS(mp);
 908         fs = ump->um_fs;
 909         if (fs->fs_magic != FS_MAGIC)
 910                 panic("ffs_statfs");
 911         sbp->f_bsize = fs->fs_fsize;
 912         sbp->f_iosize = fs->fs_bsize;
 913         sbp->f_blocks = (uint64_t)((unsigned long)fs->fs_dsize);
 914         sbp->f_bfree = (uint64_t) ((unsigned long)(fs->fs_cstotal.cs_nbfree * fs->fs_frag +
 915                 fs->fs_cstotal.cs_nffree));
 916         sbp->f_bavail = (uint64_t) ((unsigned long)freespace(fs, fs->fs_minfree));
 917         sbp->f_files =  (uint64_t) ((unsigned long)(fs->fs_ncg * fs->fs_ipg - ROOTINO));
 918         sbp->f_ffree = (uint64_t) ((unsigned long)fs->fs_cstotal.cs_nifree);
 919         return (0);
 920 }
 921
 922 int
 923 ffs_vfs_getattr(mp, fsap, context)
 924         struct mount *mp;
 925         struct vfs_attr *fsap;
 926         vfs_context_t context;
 927 {
 928         struct ufsmount *ump;
 929         struct fs *fs;
 930         kauth_cred_t cred;
 931         struct vnode *devvp;
 932         struct buf *bp;
 933         struct ufslabel *ulp;
 934         char *offset;
 935         int bs, error, length;
 936
 937         ump = VFSTOUFS(mp);
 938         fs = ump->um_fs;
 939         cred = vfs_context_ucred(context);
 940
 941         VFSATTR_RETURN(fsap, f_bsize, fs->fs_fsize);
 942         VFSATTR_RETURN(fsap, f_iosize, fs->fs_bsize);
 943         VFSATTR_RETURN(fsap, f_blocks, (uint64_t)((unsigned long)fs->fs_dsize));
 944         VFSATTR_RETURN(fsap, f_bfree, (uint64_t)((unsigned long)
 945             (fs->fs_cstotal.cs_nbfree * fs->fs_frag +
 946             fs->fs_cstotal.cs_nffree)));
 947         VFSATTR_RETURN(fsap, f_bavail, (uint64_t)((unsigned long)freespace(fs,
 948             fs->fs_minfree)));
 949         VFSATTR_RETURN(fsap, f_files, (uint64_t)((unsigned long)
 950             (fs->fs_ncg * fs->fs_ipg - ROOTINO)));
 951         VFSATTR_RETURN(fsap, f_ffree, (uint64_t)((unsigned long)
 952             fs->fs_cstotal.cs_nifree));
 953
 954         if (VFSATTR_IS_ACTIVE(fsap, f_fsid)) {
 955                 fsap->f_fsid.val[0] = mp->mnt_vfsstat.f_fsid.val[0];
 956                 fsap->f_fsid.val[1] = mp->mnt_vfsstat.f_fsid.val[1];
 957                 VFSATTR_SET_SUPPORTED(fsap, f_fsid);
 958         }
 959
 960         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
 961                 devvp = ump->um_devvp;
 962                 bs = vfs_devblocksize(mp);
 963
 964                 if (error = (int)buf_meta_bread(devvp,
 965                     (daddr64_t)(UFS_LABEL_OFFSET / bs),
 966                     MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
 967                         if (bp)
 968                                 buf_brelse(bp);
 969                         return (error);
 970                 }
 971
 972                 /*
 973                  * Since the disklabel is read directly by older user space
 974                  * code, make sure this buffer won't remain in the cache when
 975                  * we release it.
 976                  */
 977                 buf_setflags(bp, B_NOCACHE);
 978
 979                 offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
 980                 ulp = (struct ufslabel *)offset;
 981
 982                 if (ufs_label_check(ulp)) {
 983                         length = ulp->ul_namelen;
 984 #if REV_ENDIAN_FS
 985                         if (mp->mnt_flag & MNT_REVEND)
 986                                 length = OSSwapInt16(length);
 987 #endif
 988                         if (length > 0 && length <= UFS_MAX_LABEL_NAME) {
 989                                 bcopy(ulp->ul_name, fsap->f_vol_name, length);
 990                                 fsap->f_vol_name[UFS_MAX_LABEL_NAME - 1] = '\0';
 991                                 fsap->f_vol_name[length] = '\0';
 992                         }
 993                 }
 994
 995                 buf_brelse(bp);
 996                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
 997         }
 998
 999         if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
1000                 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] =
1001                     VOL_CAP_FMT_SYMBOLICLINKS |
1002                     VOL_CAP_FMT_HARDLINKS |
1003                     VOL_CAP_FMT_SPARSE_FILES |
1004                     VOL_CAP_FMT_CASE_SENSITIVE |
1005                     VOL_CAP_FMT_CASE_PRESERVING |
1006                     VOL_CAP_FMT_FAST_STATFS ;
1007                 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES]
1008                     = VOL_CAP_INT_NFSEXPORT |
1009                     VOL_CAP_INT_VOL_RENAME |
1010                     VOL_CAP_INT_ADVLOCK |
1011                     VOL_CAP_INT_FLOCK;
1012                 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1]
1013                     = 0;
1014                 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2]
1015                     = 0;
1016
1017                 /* Capabilities we know about: */
1018                 fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] =
1019                     VOL_CAP_FMT_PERSISTENTOBJECTIDS |
1020                     VOL_CAP_FMT_SYMBOLICLINKS |
1021                     VOL_CAP_FMT_HARDLINKS |
1022                     VOL_CAP_FMT_JOURNAL |
1023                     VOL_CAP_FMT_JOURNAL_ACTIVE |
1024                     VOL_CAP_FMT_NO_ROOT_TIMES |
1025                     VOL_CAP_FMT_SPARSE_FILES |
1026                     VOL_CAP_FMT_ZERO_RUNS |
1027                     VOL_CAP_FMT_CASE_SENSITIVE |
1028                     VOL_CAP_FMT_CASE_PRESERVING |
1029                     VOL_CAP_FMT_FAST_STATFS |
1030                     VOL_CAP_FMT_2TB_FILESIZE;
1031                 fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] =
1032                     VOL_CAP_INT_SEARCHFS |
1033                     VOL_CAP_INT_ATTRLIST |
1034                     VOL_CAP_INT_NFSEXPORT |
1035                     VOL_CAP_INT_READDIRATTR |
1036                     VOL_CAP_INT_EXCHANGEDATA |
1037                     VOL_CAP_INT_COPYFILE |
1038                     VOL_CAP_INT_ALLOCATE |
1039                     VOL_CAP_INT_VOL_RENAME |
1040                     VOL_CAP_INT_ADVLOCK |
1041                     VOL_CAP_INT_FLOCK ;
1042                 fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0;
1043                 fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0;
1044
1045                 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
1046         }
1047
1048         if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
1049                 fsap->f_attributes.validattr.commonattr = 0;
1050                 fsap->f_attributes.validattr.volattr =
1051                     ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1052                 fsap->f_attributes.validattr.dirattr = 0;
1053                 fsap->f_attributes.validattr.fileattr = 0;
1054                 fsap->f_attributes.validattr.forkattr = 0;
1055
1056                 fsap->f_attributes.nativeattr.commonattr = 0;
1057                 fsap->f_attributes.nativeattr.volattr =
1058                     ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1059                 fsap->f_attributes.nativeattr.dirattr = 0;
1060                 fsap->f_attributes.nativeattr.fileattr = 0;
1061                 fsap->f_attributes.nativeattr.forkattr = 0;
1062
1063                 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
1064         }
1065
1066         return (0);
1067 }
1068
1069
1070 int
1071 ffs_vfs_setattr(mp, fsap, context)
1072         struct mount *mp;
1073         struct vfs_attr *fsap;
1074         vfs_context_t context;
1075 {
1076         struct ufsmount *ump;
1077         struct vnode *devvp;
1078         struct buf *bp;
1079         struct ufslabel *ulp;
1080         kauth_cred_t cred;
1081         char *offset;
1082         int bs, error;
1083
1084
1085         ump = VFSTOUFS(mp);
1086         cred = vfs_context_ucred(context);
1087
1088         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
1089                 devvp = ump->um_devvp;
1090                 bs = vfs_devblocksize(mp);
1091                 if (error = buf_meta_bread(devvp,
1092                     (daddr64_t)(UFS_LABEL_OFFSET / bs),
1093                     MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
1094                         if (bp)
1095                                 buf_brelse(bp);
1096                         return (error);
1097                 }
1098
1099                 /*
1100                  * Since the disklabel is read directly by older user space
1101                  * code, make sure this buffer won't remain in the cache when
1102                  * we release it.
1103                  */
1104                 buf_setflags(bp, B_NOCACHE);
1105
1106                 /* Validate the label structure; init if not valid */
1107                 offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
1108                 ulp = (struct ufslabel *)offset;
1109                 if (!ufs_label_check(ulp))
1110                         ufs_label_init(ulp);
1111
1112                 /* Copy new name over existing name */
1113                 ulp->ul_namelen = strlen(fsap->f_vol_name);
1114                 bcopy(fsap->f_vol_name, ulp->ul_name, ulp->ul_namelen);
1115                 ulp->ul_name[UFS_MAX_LABEL_NAME - 1] = '\0';
1116                 ulp->ul_name[ulp->ul_namelen] = '\0';
1117
1118 #if REV_ENDIAN_FS
1119                 if (mp->mnt_flag & MNT_REVEND)
1120                         ulp->ul_namelen = OSSwapInt16(ulp->ul_namelen);
1121 #endif
1122
1123                 /* Update the checksum */
1124                 ulp->ul_checksum = 0;
1125                 ulp->ul_checksum = ul_cksum(ulp, sizeof(*ulp));
1126
1127                 /* Write the label back to disk */
1128                 buf_bwrite(bp);
1129                 bp = NULL;
1130
1131                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
1132         }
1133
1134         return (0);
1135  }
1136 struct ffs_sync_cargs {
1137         vfs_context_t context;
1138         int    waitfor;
1139         int    error;
1140 };
1141
1142
1143 static int
1144 ffs_sync_callback(struct vnode *vp, void *cargs)
1145 {
1146         struct inode *ip;
1147         struct ffs_sync_cargs *args;
1148         int error;
1149
1150         args = (struct ffs_sync_cargs *)cargs;
1151
1152         ip = VTOI(vp);
1153
1154         if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) || vnode_hasdirtyblks(vp)) {
1155                 error = VNOP_FSYNC(vp, args->waitfor, args->context);
1156
1157                 if (error)
1158                         args->error = error;
1159
1160         }
1161         return (VNODE_RETURNED);
1162 }
1163
1164 /*
1165  * Go through the disk queues to initiate sandbagged IO;
1166  * go through the inodes to write those that have been modified;
1167  * initiate the writing of the super block if it has been modified.
1168  *
1169  * Note: we are always called with the filesystem marked `MPBUSY'.
1170  */
1171 int
1172 ffs_sync(mp, waitfor, context)
1173         struct mount *mp;
1174         int waitfor;
1175         vfs_context_t context;
1176 {
1177         struct vnode *nvp, *vp;
1178         struct ufsmount *ump = VFSTOUFS(mp);
1179         struct fs *fs;
1180         struct timeval tv;
1181         int error, allerror = 0;
1182         struct ffs_sync_cargs args;
1183
1184         fs = ump->um_fs;
1185         if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {            /* XXX */
1186                 printf("fs = %s\n", fs->fs_fsmnt);
1187                 panic("update: rofs mod");
1188         }
1189         /*
1190          * Write back each (modified) inode.
1191          */
1192         args.context = context;
1193         args.waitfor = waitfor;
1194         args.error = 0;
1195         /*
1196          * ffs_sync_callback will be called for each vnode
1197          * hung off of this mount point... the vnode will be
1198          * properly referenced and unreferenced around the callback
1199          */
1200         vnode_iterate(mp, 0, ffs_sync_callback, (void *)&args);
1201
1202         if (args.error)
1203                 allerror = args.error;
1204
1205         /*
1206          * Force stale file system control information to be flushed.
1207          */
1208         if (error = VNOP_FSYNC(ump->um_devvp, waitfor, context))
1209                 allerror = error;
1210 #if QUOTA
1211         qsync(mp);
1212 #endif
1213         /*
1214          * Write back modified superblock.
1215          */
1216         if (fs->fs_fmod != 0) {
1217                 fs->fs_fmod = 0;
1218                 microtime(&tv);
1219                 fs->fs_time = tv.tv_sec;
1220                 if (error = ffs_sbupdate(ump, waitfor))
1221                         allerror = error;
1222         }
1223         return (allerror);
1224 }
1225
1226 /*
1227  * Look up a FFS dinode number to find its incore vnode, otherwise read it
1228  * in from disk.  If it is in core, wait for the lock bit to clear, then
1229  * return the inode locked.  Detection and handling of mount points must be
1230  * done by the calling routine.
1231  */
1232 int
1233 ffs_vget(mp, ino, vpp, context)
1234         mount_t mp;
1235         ino64_t ino;
1236         vnode_t *vpp;
1237         vfs_context_t context;
1238 {
1239         return(ffs_vget_internal(mp, (ino_t)ino, vpp, NULL, NULL, 0, 0));
1240 }
1241
1242
1243 int
1244 ffs_vget_internal(mp, ino, vpp, dvp, cnp, mode, fhwanted)
1245         mount_t mp;
1246         ino_t   ino;
1247         vnode_t *vpp;
1248         vnode_t dvp;
1249         struct  componentname *cnp;
1250         int     mode;
1251         int     fhwanted;
1252 {
1253         struct proc *p = current_proc();                /* XXX */
1254         struct fs *fs;
1255         struct inode *ip;
1256         struct ufsmount *ump;
1257         struct buf *bp;
1258         struct vnode *vp;
1259         struct vnode_fsparam vfsp;
1260         struct timeval tv;
1261         enum vtype vtype;
1262         dev_t dev;
1263         int i, type, error = 0;
1264
1265         *vpp = NULL;
1266         ump  = VFSTOUFS(mp);
1267         dev  = ump->um_dev;
1268 #if 0
1269         /* Check for unmount in progress */
1270         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1271                 return (EPERM);
1272         }
1273 #endif
1274         /*
1275          * Allocate a new inode... do it before we check the
1276          * cache, because the MALLOC_ZONE may block
1277          */
1278         type = M_FFSNODE;
1279         MALLOC_ZONE(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
1280
1281         /*
1282          * check in the inode hash
1283          */
1284         if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
1285                /*
1286                 * found it... get rid of the allocation
1287                 * that we didn't need and return
1288                 * the 'found' vnode
1289                 */
1290                 FREE_ZONE(ip, sizeof(struct inode), type);
1291                 vp = *vpp;
1292                 return (0);
1293         }
1294         bzero((caddr_t)ip, sizeof(struct inode));
1295         /*
1296          * lock the inode
1297          */
1298 //      lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
1299 //      lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct slock *)0, p);
1300
1301         ip->i_fs = fs = ump->um_fs;
1302         ip->i_dev = dev;
1303         ip->i_number = ino;
1304 #if QUOTA
1305         for (i = 0; i < MAXQUOTAS; i++)
1306                 ip->i_dquot[i] = NODQUOT;
1307 #endif
1308         SET(ip->i_flag, IN_ALLOC);
1309         /*
1310          * Put it onto its hash chain locked so that other requests for
1311          * this inode will block if they arrive while we are sleeping waiting
1312          * for old data structures to be purged or for the contents of the
1313          * disk portion of this inode to be read.
1314          */
1315         ufs_ihashins(ip);
1316
1317         /* Read in the disk contents for the inode, copy into the inode. */
1318         if (error = (int)buf_bread(ump->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ino))),
1319                                    (int)fs->fs_bsize, NOCRED, &bp)) {
1320                 buf_brelse(bp);
1321                 goto errout;
1322         }
1323 #if REV_ENDIAN_FS
1324         if (mp->mnt_flag & MNT_REVEND) {
1325                 byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino)),ip);
1326         } else {
1327                 ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1328         }
1329 #else
1330         ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1331 #endif /* REV_ENDIAN_FS */
1332         buf_brelse(bp);
1333
1334         if (mode == 0)
1335                 vtype = IFTOVT(ip->i_mode);
1336         else
1337                 vtype = IFTOVT(mode);
1338
1339         if (vtype == VNON) {
1340                 if (fhwanted) {
1341                         /* NFS is in play */
1342                         error = ESTALE;
1343                         goto errout;
1344                 } else {
1345                         error = ENOENT;
1346                         goto errout;
1347                 }
1348         }
1349
1350         vfsp.vnfs_mp = mp;
1351         vfsp.vnfs_vtype = vtype;
1352         vfsp.vnfs_str = "ufs";
1353         vfsp.vnfs_dvp = dvp;
1354         vfsp.vnfs_fsnode = ip;
1355         vfsp.vnfs_cnp = cnp;
1356
1357         if (mode == 0)
1358                 vfsp.vnfs_filesize = ip->i_din.di_size;
1359         else
1360                 vfsp.vnfs_filesize = 0;
1361
1362         if (vtype == VFIFO )
1363                 vfsp.vnfs_vops = FFS_FIFOOPS;
1364         else if (vtype == VBLK || vtype == VCHR)
1365                 vfsp.vnfs_vops = ffs_specop_p;
1366         else
1367                 vfsp.vnfs_vops = ffs_vnodeop_p;
1368
1369         if (vtype == VBLK || vtype == VCHR)
1370                 vfsp.vnfs_rdev = ip->i_rdev;
1371         else
1372                 vfsp.vnfs_rdev = 0;
1373
1374         if (dvp && cnp && (cnp->cn_flags & MAKEENTRY))
1375                 vfsp.vnfs_flags = 0;
1376         else
1377                 vfsp.vnfs_flags = VNFS_NOCACHE;
1378
1379         /*
1380          * Tag root directory
1381          */
1382         vfsp.vnfs_markroot = (ip->i_number == ROOTINO);
1383         vfsp.vnfs_marksystem = 0;
1384
1385         if ((error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp)))
1386                 goto errout;
1387
1388         /*
1389          * Finish inode initialization now that aliasing has been resolved.
1390          */
1391         ip->i_devvp = ump->um_devvp;
1392         ip->i_vnode = vp;
1393
1394         vnode_ref(ip->i_devvp);
1395         vnode_addfsref(vp);
1396         vnode_settag(vp, VT_UFS);
1397
1398         /*
1399          * Initialize modrev times
1400          */
1401         microtime(&tv);
1402         SETHIGH(ip->i_modrev, tv.tv_sec);
1403         SETLOW(ip->i_modrev, tv.tv_usec * 4294);
1404
1405         /*
1406          * Set up a generation number for this inode if it does not
1407          * already have one. This should only happen on old filesystems.
1408          */
1409         if (ip->i_gen == 0) {
1410                 if (++nextgennumber < (u_long)tv.tv_sec)
1411                         nextgennumber = tv.tv_sec;
1412                 ip->i_gen = nextgennumber;
1413                 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1414                         ip->i_flag |= IN_MODIFIED;
1415         }
1416         /*
1417          * Ensure that uid and gid are correct. This is a temporary
1418          * fix until fsck has been changed to do the update.
1419          */
1420         if (fs->fs_inodefmt < FS_44INODEFMT) {          /* XXX */
1421                 ip->i_uid = ip->i_din.di_ouid;          /* XXX */
1422                 ip->i_gid = ip->i_din.di_ogid;          /* XXX */
1423         }                                               /* XXX */
1424         *vpp = vp;
1425
1426         CLR(ip->i_flag, IN_ALLOC);
1427
1428         if (ISSET(ip->i_flag, IN_WALLOC))
1429                 wakeup(ip);
1430
1431         return (0);
1432
1433 errout:
1434         ufs_ihashrem(ip);
1435
1436         if (ISSET(ip->i_flag, IN_WALLOC))
1437                 wakeup(ip);
1438         FREE_ZONE(ip, sizeof(struct inode), type);
1439
1440         return (error);
1441 }
1442
1443 /*
1444  * File handle to vnode
1445  *
1446  * Have to be really careful about stale file handles:
1447  * - check that the inode number is valid
1448  * - call vget to get the locked inode
1449  * - check for an unallocated inode (i_mode == 0)
1450  */
1451 int
1452 ffs_fhtovp(mp, fhlen, fhp, vpp, context)
1453         register struct mount *mp;
1454         int fhlen;
1455         unsigned char *fhp;
1456         struct vnode **vpp;
1457         vfs_context_t context;
1458 {
1459         register struct ufid *ufhp;
1460         register struct inode *ip;
1461         struct vnode *nvp;
1462         struct fs *fs;
1463         int error;
1464         ino_t     ino;
1465
1466         if (fhlen < (int)sizeof(struct ufid))
1467                 return (EINVAL);
1468         ufhp = (struct ufid *)fhp;
1469         fs = VFSTOUFS(mp)->um_fs;
1470         ino = ntohl(ufhp->ufid_ino);
1471         if (ino < ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg)
1472                 return (ESTALE);
1473         error = ffs_vget_internal(mp, ino, &nvp, NULL, NULL, 0, 1);
1474         if (error) {
1475                 *vpp = NULLVP;
1476                 return (error);
1477         }
1478         ip = VTOI(nvp);
1479         if (ip->i_mode == 0 || ip->i_gen != ntohl(ufhp->ufid_gen)) {
1480                 vnode_put(nvp);
1481                 *vpp = NULLVP;
1482                 return (ESTALE);
1483         }
1484         *vpp = nvp;
1485         return (0);
1486 }
1487
1488 /*
1489  * Vnode pointer to File handle
1490  */
1491 /* ARGSUSED */
1492 int
1493 ffs_vptofh(vp, fhlenp, fhp, context)
1494         struct vnode *vp;
1495         int *fhlenp;
1496         unsigned char *fhp;
1497         vfs_context_t context;
1498 {
1499         register struct inode *ip;
1500         register struct ufid *ufhp;
1501
1502         if (*fhlenp < (int)sizeof(struct ufid))
1503                 return (EOVERFLOW);
1504         ip = VTOI(vp);
1505         ufhp = (struct ufid *)fhp;
1506         ufhp->ufid_ino = htonl(ip->i_number);
1507         ufhp->ufid_gen = htonl(ip->i_gen);
1508         *fhlenp = sizeof(struct ufid);
1509         return (0);
1510 }
1511
1512 /*
1513  * Initialize the filesystem; just use ufs_init.
1514  */
1515 int
1516 ffs_init(vfsp)
1517         struct vfsconf *vfsp;
1518 {
1519
1520         return (ufs_init(vfsp));
1521 }
1522
1523 /*
1524  * fast filesystem related variables.
1525  */
1526 ffs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
1527                    user_addr_t newp, size_t newlen, vfs_context_t context)
1528 {
1529         extern int doclusterread, doclusterwrite, doreallocblks, doasyncfree;
1530
1531         /* all sysctl names at this level are terminal */
1532         if (namelen != 1)
1533                 return (ENOTDIR);               /* overloaded */
1534
1535         switch (name[0]) {
1536         case FFS_CLUSTERREAD:
1537                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1538                     &doclusterread));
1539         case FFS_CLUSTERWRITE:
1540                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1541                     &doclusterwrite));
1542         case FFS_REALLOCBLKS:
1543                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1544                     &doreallocblks));
1545         case FFS_ASYNCFREE:
1546                 return (sysctl_int(oldp, oldlenp, newp, newlen, &doasyncfree));
1547         default:
1548                 return (ENOTSUP);
1549         }
1550         /* NOTREACHED */
1551 }
1552
1553 /*
1554  * Write a superblock and associated information back to disk.
1555  */
1556 int
1557 ffs_sbupdate(mp, waitfor)
1558         struct ufsmount *mp;
1559         int waitfor;
1560 {
1561         register struct fs *dfs, *fs = mp->um_fs;
1562         register struct buf *bp;
1563         int blks;
1564         void *space;
1565         int i, size, error, allerror = 0;
1566         int devBlockSize=0;
1567 #if REV_ENDIAN_FS
1568         int rev_endian=(mp->um_mountp->mnt_flag & MNT_REVEND);
1569 #endif /* REV_ENDIAN_FS */
1570
1571         /*
1572          * First write back the summary information.
1573          */
1574         blks = howmany(fs->fs_cssize, fs->fs_fsize);
1575         space = fs->fs_csp;
1576         for (i = 0; i < blks; i += fs->fs_frag) {
1577                 size = fs->fs_bsize;
1578                 if (i + fs->fs_frag > blks)
1579                         size = (blks - i) * fs->fs_fsize;
1580                 bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
1581                                 size, 0, 0, BLK_META);
1582                 bcopy(space, (char *)buf_dataptr(bp), (u_int)size);
1583 #if REV_ENDIAN_FS
1584                 if (rev_endian) {
1585                         byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
1586                 }
1587 #endif /* REV_ENDIAN_FS */
1588                 space = (char *)space + size;
1589                 if (waitfor != MNT_WAIT)
1590                         buf_bawrite(bp);
1591                 else if (error = (int)buf_bwrite(bp))
1592                         allerror = error;
1593         }
1594         /*
1595          * Now write back the superblock itself. If any errors occurred
1596          * up to this point, then fail so that the superblock avoids
1597          * being written out as clean.
1598          */
1599         if (allerror)
1600                 return (allerror);
1601         devBlockSize = vfs_devblocksize(mp->um_mountp);
1602
1603         bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)(SBOFF/devBlockSize)), (int)fs->fs_sbsize, 0, 0, BLK_META);
1604         bcopy((caddr_t)fs, (char *)buf_dataptr(bp), (u_int)fs->fs_sbsize);
1605         /* Restore compatibility to old file systems.              XXX */
1606         dfs = (struct fs *)buf_dataptr(bp);                     /* XXX */
1607         if (fs->fs_postblformat == FS_42POSTBLFMT)              /* XXX */
1608                 dfs->fs_nrpos = -1;                             /* XXX */
1609 #if REV_ENDIAN_FS
1610         /*
1611         *  Swapping bytes here ; so that in case
1612         *   of inode format < FS_44INODEFMT appropriate
1613         *   fields get moved
1614         */
1615         if (rev_endian) {
1616                 byte_swap_sbout((struct fs *)buf_dataptr(bp));
1617         }
1618 #endif /* REV_ENDIAN_FS */
1619         if (fs->fs_inodefmt < FS_44INODEFMT) {                  /* XXX */
1620                 int32_t *lp, tmp;                               /* XXX */
1621                                                                 /* XXX */
1622                 lp = (int32_t *)&dfs->fs_qbmask;                /* XXX */
1623                 tmp = lp[4];                                    /* XXX */
1624                 for (i = 4; i > 0; i--)                         /* XXX */
1625                         lp[i] = lp[i-1];                        /* XXX */
1626                 lp[0] = tmp;                                    /* XXX */
1627         }                                                       /* XXX */
1628 #if REV_ENDIAN_FS
1629         /* Note that dfs is already swapped so swap the filesize
1630         *  before writing
1631         */
1632         if (rev_endian) {
1633                 dfs->fs_maxfilesize = OSSwapInt64(mp->um_savedmaxfilesize);             /* XXX */
1634         } else {
1635 #endif /* REV_ENDIAN_FS */
1636                 dfs->fs_maxfilesize = mp->um_savedmaxfilesize;  /* XXX */
1637 #if REV_ENDIAN_FS
1638         }
1639 #endif /* REV_ENDIAN_FS */
1640         if (waitfor != MNT_WAIT)
1641                 buf_bawrite(bp);
1642         else if (error = (int)buf_bwrite(bp))
1643                 allerror = error;
1644
1645         return (allerror);
1646 }