bsd/ufs/ffs/ffs_vfsops.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License.  The rights granted to you under the
  10  * License may not be used to create, or enable the creation or
  11  * redistribution of, unlawful or unlicensed copies of an Apple operating
  12  * system, or to circumvent, violate, or enable the circumvention or
  13  * violation of, any terms of an Apple operating system software license
  14  * agreement.
  15  *
  16  * Please obtain a copy of the License at
  17  * http://www.opensource.apple.com/apsl/ and read it before using this
  18  * file.
  19  *
  20  * The Original Code and all software distributed under the License are
  21  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  22  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  23  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  24  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  25  * Please see the License for the specific language governing rights and
  26  * limitations under the License.
  27  *
  28  * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
  29  */
  30 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  31 /*
  32  * Copyright (c) 1989, 1991, 1993, 1994
  33  *      The Regents of the University of California.  All rights reserved.
  34  *
  35  * Redistribution and use in source and binary forms, with or without
  36  * modification, are permitted provided that the following conditions
  37  * are met:
  38  * 1. Redistributions of source code must retain the above copyright
  39  *    notice, this list of conditions and the following disclaimer.
  40  * 2. Redistributions in binary form must reproduce the above copyright
  41  *    notice, this list of conditions and the following disclaimer in the
  42  *    documentation and/or other materials provided with the distribution.
  43  * 3. All advertising materials mentioning features or use of this software
  44  *    must display the following acknowledgement:
  45  *      This product includes software developed by the University of
  46  *      California, Berkeley and its contributors.
  47  * 4. Neither the name of the University nor the names of its contributors
  48  *    may be used to endorse or promote products derived from this software
  49  *    without specific prior written permission.
  50  *
  51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  61  * SUCH DAMAGE.
  62  *
  63  *      @(#)ffs_vfsops.c        8.31 (Berkeley) 5/20/95
  64  */
  65
  66 #include <rev_endian_fs.h>
  67 #include <sys/param.h>
  68 #include <sys/systm.h>
  69 #include <sys/namei.h>
  70 #include <sys/proc.h>
  71 #include <sys/kauth.h>
  72 #include <sys/kernel.h>
  73 #include <sys/vnode_internal.h>
  74 #include <sys/socket.h>
  75 #include <sys/mount_internal.h>
  76 #include <sys/mount.h>
  77 #include <sys/buf.h>
  78 #include <sys/mbuf.h>
  79 #include <sys/file.h>
  80 #include <sys/disk.h>
  81 #include <sys/ioctl.h>
  82 #include <sys/errno.h>
  83 #include <sys/malloc.h>
  84 #include <sys/ubc.h>
  85 #include <sys/quota.h>
  86
  87 #include <miscfs/specfs/specdev.h>
  88
  89 #include <ufs/ufs/quota.h>
  90 #include <ufs/ufs/ufsmount.h>
  91 #include <ufs/ufs/inode.h>
  92 #include <ufs/ufs/ufs_extern.h>
  93
  94 #include <ufs/ffs/fs.h>
  95 #include <ufs/ffs/ffs_extern.h>
  96 #if REV_ENDIAN_FS
  97 #include <ufs/ufs/ufs_byte_order.h>
  98 #include <libkern/OSByteOrder.h>
  99 #endif /* REV_ENDIAN_FS */
 100
 101 int ffs_sbupdate(struct ufsmount *, int);
 102
 103 struct vfsops ufs_vfsops = {
 104         ffs_mount,
 105         ufs_start,
 106         ffs_unmount,
 107         ufs_root,
 108         ufs_quotactl,
 109         ffs_vfs_getattr,
 110         ffs_sync,
 111         ffs_vget,
 112         ffs_fhtovp,
 113         ffs_vptofh,
 114         ffs_init,
 115         ffs_sysctl,
 116         ffs_vfs_setattr,
 117         {0}
 118 };
 119
 120 extern u_long nextgennumber;
 121
 122 union _qcvt {
 123         int64_t qcvt;
 124         int32_t val[2];
 125 };
 126 #define SETHIGH(q, h) { \
 127         union _qcvt tmp; \
 128         tmp.qcvt = (q); \
 129         tmp.val[_QUAD_HIGHWORD] = (h); \
 130         (q) = tmp.qcvt; \
 131 }
 132 #define SETLOW(q, l) { \
 133         union _qcvt tmp; \
 134         tmp.qcvt = (q); \
 135         tmp.val[_QUAD_LOWWORD] = (l); \
 136         (q) = tmp.qcvt; \
 137 }
 138
 139 /*
 140  * Called by main() when ufs is going to be mounted as root.
 141  */
 142 int
 143 ffs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
 144 {
 145         struct proc *p = current_proc();        /* XXX */
 146         int     error;
 147
 148         /* Set asynchronous flag by default */
 149         vfs_setflags(mp, MNT_ASYNC);
 150
 151         if (error = ffs_mountfs(rvp, mp, context))
 152                 return (error);
 153
 154         (void)ffs_statfs(mp, vfs_statfs(mp), NULL);
 155
 156         return (0);
 157 }
 158
 159 /*
 160  * VFS Operations.
 161  *
 162  * mount system call
 163  */
 164 int
 165 ffs_mount(struct mount *mp, vnode_t devvp, __unused user_addr_t data,  vfs_context_t context)
 166 {
 167         struct proc *p = vfs_context_proc(context);
 168         struct ufsmount *ump;
 169         register struct fs *fs;
 170         u_int size;
 171         int error  = 0, flags;
 172         mode_t accessmode;
 173         int ronly;
 174         int reload = 0;
 175
 176         /*
 177          * If updating, check whether changing from read-write to
 178          * read-only; if there is no device name, that's all we do.
 179          */
 180         if (mp->mnt_flag & MNT_UPDATE) {
 181                 ump = VFSTOUFS(mp);
 182                 fs = ump->um_fs;
 183                 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 184                         /*
 185                          * Flush any dirty data.
 186                          */
 187                         VFS_SYNC(mp, MNT_WAIT, context);
 188                         /*
 189                          * Check for and optionally get rid of files open
 190                          * for writing.
 191                          */
 192                         flags = WRITECLOSE;
 193                         if (mp->mnt_flag & MNT_FORCE)
 194                                 flags |= FORCECLOSE;
 195                         if (error = ffs_flushfiles(mp, flags, p))
 196                                 return (error);
 197                         fs->fs_clean = 1;
 198                         fs->fs_ronly = 1;
 199                         if (error = ffs_sbupdate(ump, MNT_WAIT)) {
 200                                 fs->fs_clean = 0;
 201                                 fs->fs_ronly = 0;
 202                                 return (error);
 203                         }
 204                 }
 205                 /* save fs_ronly to later use */
 206                 ronly = fs->fs_ronly;
 207                 if ((mp->mnt_flag & MNT_RELOAD) || ronly)
 208                         reload = 1;
 209                 if ((reload) &&
 210                     (error = ffs_reload(mp, vfs_context_ucred(context), p)))
 211                         return (error);
 212                 /* replace the ronly after load */
 213                 fs->fs_ronly = ronly;
 214                 /*
 215                 * Do not update the file system if the user was in singleuser
 216                 * and then tries to mount -uw without fscking
 217                 */
 218                 if (!fs->fs_clean && ronly) {
 219                         printf("WARNING: trying to mount a dirty file system\n");
 220                         if (issingleuser() && (mp->mnt_flag & MNT_ROOTFS)) {
 221                                 printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",fs->fs_fsmnt);
 222                                 /*
 223                                  * Reset the readonly bit as reload might have
 224                                  * modified this bit
 225                                  */
 226                                 fs->fs_ronly = 1;
 227                                 return(EPERM);
 228                         }
 229                 }
 230
 231                 if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 232                         fs->fs_ronly = 0;
 233                         fs->fs_clean = 0;
 234                         (void) ffs_sbupdate(ump, MNT_WAIT);
 235                 }
 236                 if (devvp == 0) {
 237                         return(0);
 238                 }
 239         }
 240         if ((mp->mnt_flag & MNT_UPDATE) == 0)
 241                 error = ffs_mountfs(devvp, mp, context);
 242         else {
 243                 if (devvp != ump->um_devvp)
 244                         error = EINVAL; /* needs translation */
 245         }
 246         if (error) {
 247                 return (error);
 248         }
 249         ump = VFSTOUFS(mp);
 250         fs = ump->um_fs;
 251         bzero(fs->fs_fsmnt , sizeof(fs->fs_fsmnt));
 252         strncpy(fs->fs_fsmnt,  (caddr_t)mp->mnt_vfsstat.f_mntonname, sizeof(fs->fs_fsmnt) - 1);
 253         (void)ffs_statfs(mp, &mp->mnt_vfsstat, p);
 254         return (0);
 255 }
 256
 257
 258 struct ffs_reload_cargs {
 259         struct vnode    *devvp;
 260         kauth_cred_t cred;
 261         struct fs       *fs;
 262         struct proc     *p;
 263         int             error;
 264 #if REV_ENDIAN_FS
 265         int             rev_endian;
 266 #endif /* REV_ENDIAN_FS */
 267 };
 268
 269
 270 static int
 271 ffs_reload_callback(struct vnode *vp, void *cargs)
 272 {
 273         struct inode *ip;
 274         struct buf   *bp;
 275         struct fs    *fs;
 276         struct ffs_reload_cargs *args;
 277
 278         args = (struct ffs_reload_cargs *)cargs;
 279
 280         /*
 281          * flush all the buffers associated with this node
 282          */
 283         if (buf_invalidateblks(vp, 0, 0, 0))
 284                 panic("ffs_reload: dirty2");
 285
 286         /*
 287          * Step 6: re-read inode data
 288          */
 289         ip = VTOI(vp);
 290         fs = args->fs;
 291
 292         if (args->error = (int)buf_bread(args->devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ip->i_number))),
 293                                          (int)fs->fs_bsize, NOCRED, &bp)) {
 294                 buf_brelse(bp);
 295
 296                 return (VNODE_RETURNED_DONE);
 297         }
 298
 299 #if REV_ENDIAN_FS
 300         if (args->rev_endian) {
 301                 byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) +
 302                                     ino_to_fsbo(fs, ip->i_number)), ip);
 303         } else {
 304 #endif /* REV_ENDIAN_FS */
 305                 ip->i_din = *((struct dinode *)buf_dataptr(bp) +
 306                               ino_to_fsbo(fs, ip->i_number));
 307 #if REV_ENDIAN_FS
 308         }
 309 #endif /* REV_ENDIAN_FS */
 310
 311         buf_brelse(bp);
 312
 313         return (VNODE_RETURNED);
 314 }
 315
 316
 317 /*
 318  * Reload all incore data for a filesystem (used after running fsck on
 319  * the root filesystem and finding things to fix). The filesystem must
 320  * be mounted read-only.
 321  *
 322  * Things to do to update the mount:
 323  *      1) invalidate all cached meta-data.
 324  *      2) re-read superblock from disk.
 325  *      3) re-read summary information from disk.
 326  *      4) invalidate all inactive vnodes.
 327  *      5) invalidate all cached file data.
 328  *      6) re-read inode data for all active vnodes.
 329  */
 330 ffs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p)
 331 {
 332         register struct vnode *devvp;
 333         void *space;
 334         struct buf *bp;
 335         struct fs *fs, *newfs;
 336         int i, blks, size, error;
 337         u_int64_t maxfilesize;                                  /* XXX */
 338         int32_t *lp;
 339         struct ffs_reload_cargs args;
 340 #if REV_ENDIAN_FS
 341         int rev_endian = (mountp->mnt_flag & MNT_REVEND);
 342 #endif /* REV_ENDIAN_FS */
 343
 344         if ((mountp->mnt_flag & MNT_RDONLY) == 0)
 345                 return (EINVAL);
 346         /*
 347          * Step 1: invalidate all cached meta-data.
 348          */
 349         devvp = VFSTOUFS(mountp)->um_devvp;
 350         if (buf_invalidateblks(devvp, 0, 0, 0))
 351                 panic("ffs_reload: dirty1");
 352         /*
 353          * Step 2: re-read superblock from disk.
 354          */
 355         size = vfs_devblocksize(mountp);
 356
 357         if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)), SBSIZE, NOCRED,&bp)) {
 358                 buf_brelse(bp);
 359                 return (error);
 360         }
 361         newfs = (struct fs *)buf_dataptr(bp);
 362 #if REV_ENDIAN_FS
 363         if (rev_endian) {
 364                 byte_swap_sbin(newfs);
 365         }
 366 #endif /* REV_ENDIAN_FS */
 367         if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
 368             newfs->fs_bsize < sizeof(struct fs)) {
 369 #if REV_ENDIAN_FS
 370                 if (rev_endian)
 371                         byte_swap_sbout(newfs);
 372 #endif /* REV_ENDIAN_FS */
 373
 374                 buf_brelse(bp);
 375                 return (EIO);           /* XXX needs translation */
 376         }
 377         fs = VFSTOUFS(mountp)->um_fs;
 378         /*
 379          * Copy pointer fields back into superblock before copying in   XXX
 380          * new superblock. These should really be in the ufsmount.      XXX
 381          * Note that important parameters (eg fs_ncg) are unchanged.
 382          */
 383         newfs->fs_csp = fs->fs_csp;
 384         newfs->fs_maxcluster = fs->fs_maxcluster;
 385         newfs->fs_contigdirs = fs->fs_contigdirs;
 386         bcopy(newfs, fs, (u_int)fs->fs_sbsize);
 387         if (fs->fs_sbsize < SBSIZE)
 388                 buf_markinvalid(bp);
 389 #if REV_ENDIAN_FS
 390         if (rev_endian)
 391                 byte_swap_sbout(newfs);
 392 #endif /* REV_ENDIAN_FS */
 393         buf_brelse(bp);
 394         mountp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 395         ffs_oldfscompat(fs);
 396         maxfilesize = 0x100000000ULL;    /* 4GB */
 397         if (fs->fs_maxfilesize > maxfilesize)                   /* XXX */
 398                 fs->fs_maxfilesize = maxfilesize;               /* XXX */
 399         /*
 400          * Step 3: re-read summary information from disk.
 401          */
 402         blks = howmany(fs->fs_cssize, fs->fs_fsize);
 403         space = fs->fs_csp;
 404         for (i = 0; i < blks; i += fs->fs_frag) {
 405                 size = fs->fs_bsize;
 406                 if (i + fs->fs_frag > blks)
 407                         size = (blks - i) * fs->fs_fsize;
 408                 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)), size,
 409                                            NOCRED, &bp)) {
 410                         buf_brelse(bp);
 411                         return (error);
 412                 }
 413 #if REV_ENDIAN_FS
 414                 if (rev_endian) {
 415                         /* csum swaps */
 416                         byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
 417                 }
 418 #endif /* REV_ENDIAN_FS */
 419                 bcopy((char *)buf_dataptr(bp), space, (u_int)size);
 420 #if REV_ENDIAN_FS
 421                 if (rev_endian) {
 422                         /* csum swaps */
 423                         byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
 424                 }
 425 #endif /* REV_ENDIAN_FS */
 426                 space = (char *) space + size;
 427                 buf_brelse(bp);
 428         }
 429         /*
 430          * We no longer know anything about clusters per cylinder group.
 431          */
 432         if (fs->fs_contigsumsize > 0) {
 433                 lp = fs->fs_maxcluster;
 434                 for (i = 0; i < fs->fs_ncg; i++)
 435                         *lp++ = fs->fs_contigsumsize;
 436         }
 437 #if REV_ENDIAN_FS
 438         args.rev_endian = rev_endian;
 439 #endif /* REV_ENDIAN_FS */
 440         args.devvp = devvp;
 441         args.cred = cred;
 442         args.fs = fs;
 443         args.p = p;
 444         args.error = 0;
 445         /*
 446          * ffs_reload_callback will be called for each vnode
 447          * hung off of this mount point that can't be recycled...
 448          * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
 449          * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
 450          * properly referenced and unreferenced around the callback
 451          */
 452         vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, ffs_reload_callback, (void *)&args);
 453
 454         return (args.error);
 455 }
 456
 457 /*
 458  * Common code for mount and mountroot
 459  */
 460 int
 461 ffs_mountfs(devvp, mp, context)
 462         struct vnode *devvp;
 463         struct mount *mp;
 464         vfs_context_t context;
 465 {
 466         struct ufsmount *ump;
 467         struct buf *bp;
 468         struct fs *fs;
 469         dev_t dev;
 470         struct buf *cgbp;
 471         struct cg *cgp;
 472         int32_t clustersumoff;
 473         void *space;
 474         int error, i, blks, ronly;
 475         u_int32_t size;
 476         int32_t *lp;
 477         kauth_cred_t cred;
 478         u_int64_t maxfilesize;                                  /* XXX */
 479         u_int dbsize = DEV_BSIZE;
 480 #if REV_ENDIAN_FS
 481         int rev_endian=0;
 482 #endif /* REV_ENDIAN_FS */
 483         dev = devvp->v_rdev;
 484         cred = vfs_context_ucred(context);
 485
 486         ronly = vfs_isrdonly(mp);
 487         bp  = NULL;
 488         ump = NULL;
 489
 490         /* Advisory locking should be handled at the VFS layer */
 491         vfs_setlocklocal(mp);
 492
 493         /* Obtain the actual device block size */
 494         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&size, 0, context)) {
 495                 error = ENXIO;
 496                 goto out;
 497         }
 498
 499         if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)),
 500             SBSIZE, cred, &bp))
 501                 goto out;
 502         fs = (struct fs *)buf_dataptr(bp);
 503 #if REV_ENDIAN_FS
 504         if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
 505             fs->fs_bsize < sizeof(struct fs)) {
 506                 int magic = fs->fs_magic;
 507
 508                 byte_swap_ints(&magic, 1);
 509                 if (magic != FS_MAGIC) {
 510                         error = EINVAL;
 511                         goto out;
 512                 }
 513                 byte_swap_sbin(fs);
 514                 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
 515                         fs->fs_bsize < sizeof(struct fs)) {
 516                         byte_swap_sbout(fs);
 517                         error = EINVAL;         /* XXX needs translation */
 518                         goto out;
 519                 }
 520                 rev_endian=1;
 521         }
 522 #endif /* REV_ENDIAN_FS */
 523         if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
 524             fs->fs_bsize < sizeof(struct fs)) {
 525 #if REV_ENDIAN_FS
 526                 if (rev_endian)
 527                         byte_swap_sbout(fs);
 528 #endif /* REV_ENDIAN_FS */
 529                 error = EINVAL;         /* XXX needs translation */
 530                 goto out;
 531         }
 532
 533
 534         /*
 535          * Buffer cache does not handle multiple pages in a buf when
 536          * invalidating incore buffer in pageout. There are no locks
 537          * in the pageout path.  So there is a danger of loosing data when
 538          * block allocation happens at the same time a pageout of buddy
 539          * page occurs. incore() returns buf with both
 540          * pages, this leads vnode-pageout to incorrectly flush of entire.
 541          * buf. Till the low level ffs code is modified to deal with these
 542          * do not mount any FS more than 4K size.
 543          */
 544         /*
 545          * Can't mount filesystems with a fragment size less than DIRBLKSIZ
 546          */
 547         /*
 548          * Don't mount dirty filesystems, except for the root filesystem
 549          */
 550         if ((fs->fs_bsize > PAGE_SIZE) || (fs->fs_fsize < DIRBLKSIZ) ||
 551         ((!(mp->mnt_flag & MNT_ROOTFS)) && (!fs->fs_clean))) {
 552 #if REV_ENDIAN_FS
 553                 if (rev_endian)
 554                         byte_swap_sbout(fs);
 555 #endif /* REV_ENDIAN_FS */
 556         error = ENOTSUP;
 557         goto out;
 558     }
 559
 560         /* Let's figure out the devblock size the file system is with */
 561         /* the device block size = fragment size / number of sectors per frag */
 562
 563         dbsize = fs->fs_fsize / NSPF(fs);
 564         if(dbsize <= 0 ) {
 565                 kprintf("device blocksize computaion failed\n");
 566         } else {
 567                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&dbsize,
 568                                 FWRITE, context) != 0) {
 569                         kprintf("failed to set device blocksize\n");
 570                 }
 571                 /* force the specfs to reread blocksize from size() */
 572                 set_fsblocksize(devvp);
 573         }
 574
 575         /* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
 576         if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
 577 #if REV_ENDIAN_FS
 578                 if (rev_endian)
 579                         byte_swap_sbout(fs);
 580 #endif /* REV_ENDIAN_FS */
 581                 error = EROFS;          /* needs translation */
 582                 goto out;
 583         }
 584
 585         /* If we are not mounting read only, then check for overlap
 586          * condition in cylinder group's free block map.
 587          * If overlap exists, then force this into a read only mount
 588          * to avoid further corruption. PR#2216969
 589          */
 590         if (ronly == 0){
 591             if (error = (int)buf_bread (devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, 0))),
 592                                         (int)fs->fs_cgsize, NOCRED, &cgbp)) {
 593                         buf_brelse(cgbp);
 594                         goto out;
 595                 }
 596                 cgp = (struct cg *)buf_dataptr(cgbp);
 597 #if REV_ENDIAN_FS
 598                 if (rev_endian)
 599                         byte_swap_cgin(cgp,fs);
 600 #endif /* REV_ENDIAN_FS */
 601                 if (!cg_chkmagic(cgp)){
 602 #if REV_ENDIAN_FS
 603                                 if (rev_endian)
 604                                         byte_swap_cgout(cgp,fs);
 605 #endif /* REV_ENDIAN_FS */
 606                         buf_brelse(cgbp);
 607                         goto out;
 608                 }
 609                 if (cgp->cg_clustersumoff != 0) {
 610                         /* Check for overlap */
 611                         clustersumoff = cgp->cg_freeoff +
 612                         howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY);
 613                         clustersumoff = roundup(clustersumoff, sizeof(long));
 614                         if (cgp->cg_clustersumoff < clustersumoff) {
 615                         /* Overlap exists */
 616                         mp->mnt_flag |= MNT_RDONLY;
 617                                 ronly = 1;
 618                         }
 619                 }
 620 #if REV_ENDIAN_FS
 621                         if (rev_endian)
 622                                 byte_swap_cgout(cgp,fs);
 623 #endif /* REV_ENDIAN_FS */
 624                         buf_brelse(cgbp);
 625         }
 626
 627         ump = _MALLOC(sizeof *ump, M_UFSMNT, M_WAITOK);
 628         bzero((caddr_t)ump, sizeof *ump);
 629         ump->um_fs = _MALLOC((u_long)fs->fs_sbsize, M_UFSMNT,
 630             M_WAITOK);
 631         bcopy((char *)buf_dataptr(bp), ump->um_fs, (u_int)fs->fs_sbsize);
 632         if (fs->fs_sbsize < SBSIZE)
 633                 buf_markinvalid(bp);
 634 #if REV_ENDIAN_FS
 635         if (rev_endian)
 636                 byte_swap_sbout(fs);
 637 #endif /* REV_ENDIAN_FS */
 638         buf_brelse(bp);
 639         bp = NULL;
 640         fs = ump->um_fs;
 641         fs->fs_ronly = ronly;
 642         size = fs->fs_cssize;
 643         blks = howmany(size, fs->fs_fsize);
 644         if (fs->fs_contigsumsize > 0)
 645                 size += fs->fs_ncg * sizeof(int32_t);
 646         size += fs->fs_ncg * sizeof(u_int8_t);
 647         space = _MALLOC((u_long)size, M_UFSMNT, M_WAITOK);
 648         fs->fs_csp = space;
 649         for (i = 0; i < blks; i += fs->fs_frag) {
 650                 size = fs->fs_bsize;
 651                 if (i + fs->fs_frag > blks)
 652                         size = (blks - i) * fs->fs_fsize;
 653                 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
 654                                            size, cred, &bp)) {
 655                         _FREE(fs->fs_csp, M_UFSMNT);
 656                         goto out;
 657                 }
 658                 bcopy((char *)buf_dataptr(bp), space, (u_int)size);
 659 #if REV_ENDIAN_FS
 660                 if (rev_endian)
 661                         byte_swap_ints((int *) space, size / sizeof(int));
 662 #endif /* REV_ENDIAN_FS */
 663                 space = (char *)space + size;
 664                 buf_brelse(bp);
 665                 bp = NULL;
 666         }
 667         if (fs->fs_contigsumsize > 0) {
 668                 fs->fs_maxcluster = lp = space;
 669                 for (i = 0; i < fs->fs_ncg; i++)
 670                         *lp++ = fs->fs_contigsumsize;
 671                 space = lp;
 672         }
 673         size = fs->fs_ncg * sizeof(u_int8_t);
 674         fs->fs_contigdirs = (u_int8_t *)space;
 675         space = (u_int8_t *)space + size;
 676         bzero(fs->fs_contigdirs, size);
 677         /* XXX Compatibility for old filesystems */
 678         if (fs->fs_avgfilesize <= 0)
 679                 fs->fs_avgfilesize = AVFILESIZ;
 680         if (fs->fs_avgfpdir <= 0)
 681                 fs->fs_avgfpdir = AFPDIR;
 682         /* XXX End of compatibility */
 683         mp->mnt_data = (qaddr_t)ump;
 684         mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
 685         mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
 686         /* XXX warning hardcoded max symlen and not "mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;" */
 687         mp->mnt_maxsymlinklen = 60;
 688 #if REV_ENDIAN_FS
 689         if (rev_endian)
 690                 mp->mnt_flag |= MNT_REVEND;
 691 #endif /* REV_ENDIAN_FS */
 692         ump->um_mountp = mp;
 693         ump->um_dev = dev;
 694         ump->um_devvp = devvp;
 695         ump->um_nindir = fs->fs_nindir;
 696         ump->um_bptrtodb = fs->fs_fsbtodb;
 697         ump->um_seqinc = fs->fs_frag;
 698         for (i = 0; i < MAXQUOTAS; i++)
 699                 dqfileinit(&ump->um_qfiles[i]);
 700         ffs_oldfscompat(fs);
 701         ump->um_savedmaxfilesize = fs->fs_maxfilesize;          /* XXX */
 702         maxfilesize = 0x100000000ULL;    /* 4GB */
 703 #if 0
 704         maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1; /* XXX */
 705 #endif /* 0 */
 706         if (fs->fs_maxfilesize > maxfilesize)                   /* XXX */
 707                 fs->fs_maxfilesize = maxfilesize;               /* XXX */
 708         if (ronly == 0) {
 709                 fs->fs_clean = 0;
 710                 (void) ffs_sbupdate(ump, MNT_WAIT);
 711         }
 712         return (0);
 713 out:
 714         if (bp)
 715                 buf_brelse(bp);
 716         if (ump) {
 717                 _FREE(ump->um_fs, M_UFSMNT);
 718                 _FREE(ump, M_UFSMNT);
 719         }
 720         return (error);
 721 }
 722
 723 /*
 724  * Sanity checks for old file systems.
 725  *
 726  * XXX - goes away some day.
 727  */
 728 ffs_oldfscompat(fs)
 729         struct fs *fs;
 730 {
 731         int i;
 732
 733         fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);       /* XXX */
 734         fs->fs_interleave = max(fs->fs_interleave, 1);          /* XXX */
 735         if (fs->fs_postblformat == FS_42POSTBLFMT)              /* XXX */
 736                 fs->fs_nrpos = 8;                               /* XXX */
 737         if (fs->fs_inodefmt < FS_44INODEFMT) {                  /* XXX */
 738                 u_int64_t sizepb = fs->fs_bsize;                /* XXX */
 739                                                                 /* XXX */
 740                 fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */
 741                 for (i = 0; i < NIADDR; i++) {                  /* XXX */
 742                         sizepb *= NINDIR(fs);                   /* XXX */
 743                         fs->fs_maxfilesize += sizepb;           /* XXX */
 744                 }                                               /* XXX */
 745                 fs->fs_qbmask = ~fs->fs_bmask;                  /* XXX */
 746                 fs->fs_qfmask = ~fs->fs_fmask;                  /* XXX */
 747         }                                                       /* XXX */
 748         return (0);
 749 }
 750
 751 /*
 752  * unmount system call
 753  */
 754 int
 755 ffs_unmount(mp, mntflags, context)
 756         struct mount *mp;
 757         int mntflags;
 758         vfs_context_t context;
 759 {
 760         struct proc *p = vfs_context_proc(context);
 761         register struct ufsmount *ump;
 762         register struct fs *fs;
 763         int error, flags;
 764         int force;
 765
 766         flags = 0;
 767         force = 0;
 768         if (mntflags & MNT_FORCE) {
 769                 flags |= FORCECLOSE;
 770                 force = 1;
 771         }
 772         if ( (error = ffs_flushfiles(mp, flags, p)) && !force )
 773                 return (error);
 774         ump = VFSTOUFS(mp);
 775         fs = ump->um_fs;
 776
 777         if (fs->fs_ronly == 0) {
 778                 fs->fs_clean = 1;
 779                 if (error = ffs_sbupdate(ump, MNT_WAIT)) {
 780                         fs->fs_clean = 0;
 781 #ifdef notyet
 782                 /* we can atleast cleanup ; as the media could be WP */
 783                 /* & during mount, we do not check for write failures  */
 784                 /* FIXME LATER : the Correct fix would be to have */
 785                 /* mount detect the WP media and downgrade to readonly mount */
 786                 /* For now, here it is */
 787                         return (error);
 788 #endif /* notyet */
 789                 }
 790         }
 791         _FREE(fs->fs_csp, M_UFSMNT);
 792         _FREE(fs, M_UFSMNT);
 793         _FREE(ump, M_UFSMNT);
 794
 795         return (0);
 796 }
 797
 798 /*
 799  * Flush out all the files in a filesystem.
 800  */
 801 ffs_flushfiles(mp, flags, p)
 802         register struct mount *mp;
 803         int flags;
 804         struct proc *p;
 805 {
 806         register struct ufsmount *ump;
 807         int i, error;
 808
 809         ump = VFSTOUFS(mp);
 810
 811 #if QUOTA
 812         /*
 813          * NOTE: The open quota files have an indirect reference
 814          * on the root directory vnode.  We must account for this
 815          * extra reference when doing the intial vflush.
 816          */
 817         if (mp->mnt_flag & MNT_QUOTA) {
 818                 struct vnode *rootvp = NULLVP;
 819                 int quotafilecnt = 0;
 820
 821                 /* Find out how many quota files we have open. */
 822                 for (i = 0; i < MAXQUOTAS; i++) {
 823                         if (ump->um_qfiles[i].qf_vp != NULLVP)
 824                                 ++quotafilecnt;
 825                 }
 826
 827                 /*
 828                  * Check if the root vnode is in our inode hash
 829                  * (so we can skip over it).
 830                  */
 831                 rootvp = ufs_ihashget(ump->um_dev, ROOTINO);
 832
 833                 error = vflush(mp, rootvp, SKIPSYSTEM|flags);
 834
 835                 if (rootvp) {
 836                         /*
 837                          * See if there are additional references on the
 838                          * root vp besides the ones obtained from the open
 839                          * quota files and the hfs_chashget call above.
 840                          */
 841                         if ((error == 0) &&
 842                             (rootvp->v_usecount > (1 + quotafilecnt))) {
 843                                 error = EBUSY;  /* root dir is still open */
 844                         }
 845                         vnode_put(rootvp);
 846                 }
 847                 if (error && (flags & FORCECLOSE) == 0)
 848                         return (error);
 849
 850                 for (i = 0; i < MAXQUOTAS; i++) {
 851                         if (ump->um_qfiles[i].qf_vp == NULLVP)
 852                                 continue;
 853                         quotaoff(mp, i);
 854                 }
 855                 /*
 856                  * Here we fall through to vflush again to ensure
 857                  * that we have gotten rid of all the system vnodes.
 858                  */
 859         }
 860 #endif
 861         error = vflush(mp, NULLVP, SKIPSWAP|flags);
 862         error = vflush(mp, NULLVP, flags);
 863         return (error);
 864 }
 865
 866 /*
 867  * Get file system statistics.
 868  */
 869 int
 870 ffs_statfs(mp, sbp, context)
 871         struct mount *mp;
 872         register struct vfsstatfs *sbp;
 873         vfs_context_t context;
 874 {
 875         register struct ufsmount *ump;
 876         register struct fs *fs;
 877
 878         ump = VFSTOUFS(mp);
 879         fs = ump->um_fs;
 880         if (fs->fs_magic != FS_MAGIC)
 881                 panic("ffs_statfs");
 882         sbp->f_bsize = fs->fs_fsize;
 883         sbp->f_iosize = fs->fs_bsize;
 884         sbp->f_blocks = (uint64_t)((unsigned long)fs->fs_dsize);
 885         sbp->f_bfree = (uint64_t) ((unsigned long)(fs->fs_cstotal.cs_nbfree * fs->fs_frag +
 886                 fs->fs_cstotal.cs_nffree));
 887         sbp->f_bavail = (uint64_t) ((unsigned long)freespace(fs, fs->fs_minfree));
 888         sbp->f_files =  (uint64_t) ((unsigned long)(fs->fs_ncg * fs->fs_ipg - ROOTINO));
 889         sbp->f_ffree = (uint64_t) ((unsigned long)fs->fs_cstotal.cs_nifree);
 890         return (0);
 891 }
 892
 893 int
 894 ffs_vfs_getattr(mp, fsap, context)
 895         struct mount *mp;
 896         struct vfs_attr *fsap;
 897         vfs_context_t context;
 898 {
 899         struct ufsmount *ump;
 900         struct fs *fs;
 901         kauth_cred_t cred;
 902         struct vnode *devvp;
 903         struct buf *bp;
 904         struct ufslabel *ulp;
 905         char *offset;
 906         int bs, error, length;
 907
 908         ump = VFSTOUFS(mp);
 909         fs = ump->um_fs;
 910         cred = vfs_context_ucred(context);
 911
 912         VFSATTR_RETURN(fsap, f_bsize, fs->fs_fsize);
 913         VFSATTR_RETURN(fsap, f_iosize, fs->fs_bsize);
 914         VFSATTR_RETURN(fsap, f_blocks, (uint64_t)((unsigned long)fs->fs_dsize));
 915         VFSATTR_RETURN(fsap, f_bfree, (uint64_t)((unsigned long)
 916             (fs->fs_cstotal.cs_nbfree * fs->fs_frag +
 917             fs->fs_cstotal.cs_nffree)));
 918         VFSATTR_RETURN(fsap, f_bavail, (uint64_t)((unsigned long)freespace(fs,
 919             fs->fs_minfree)));
 920         VFSATTR_RETURN(fsap, f_files, (uint64_t)((unsigned long)
 921             (fs->fs_ncg * fs->fs_ipg - ROOTINO)));
 922         VFSATTR_RETURN(fsap, f_ffree, (uint64_t)((unsigned long)
 923             fs->fs_cstotal.cs_nifree));
 924
 925         if (VFSATTR_IS_ACTIVE(fsap, f_fsid)) {
 926                 fsap->f_fsid.val[0] = mp->mnt_vfsstat.f_fsid.val[0];
 927                 fsap->f_fsid.val[1] = mp->mnt_vfsstat.f_fsid.val[1];
 928                 VFSATTR_SET_SUPPORTED(fsap, f_fsid);
 929         }
 930
 931         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
 932                 devvp = ump->um_devvp;
 933                 bs = vfs_devblocksize(mp);
 934
 935                 if (error = (int)buf_meta_bread(devvp,
 936                     (daddr64_t)(UFS_LABEL_OFFSET / bs),
 937                     MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
 938                         if (bp)
 939                                 buf_brelse(bp);
 940                         return (error);
 941                 }
 942
 943                 /*
 944                  * Since the disklabel is read directly by older user space
 945                  * code, make sure this buffer won't remain in the cache when
 946                  * we release it.
 947                  */
 948                 buf_setflags(bp, B_NOCACHE);
 949
 950                 offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
 951                 ulp = (struct ufslabel *)offset;
 952
 953                 if (ufs_label_check(ulp)) {
 954                         length = ulp->ul_namelen;
 955 #if REV_ENDIAN_FS
 956                         if (mp->mnt_flag & MNT_REVEND)
 957                                 length = OSSwapInt16(length);
 958 #endif
 959                         if (length > 0 && length <= UFS_MAX_LABEL_NAME) {
 960                                 bcopy(ulp->ul_name, fsap->f_vol_name, length);
 961                                 fsap->f_vol_name[UFS_MAX_LABEL_NAME - 1] = '\0';
 962                                 fsap->f_vol_name[length] = '\0';
 963                         }
 964                 }
 965
 966                 buf_brelse(bp);
 967                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
 968         }
 969
 970         if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
 971                 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] =
 972                     VOL_CAP_FMT_SYMBOLICLINKS |
 973                     VOL_CAP_FMT_HARDLINKS |
 974                     VOL_CAP_FMT_SPARSE_FILES |
 975                     VOL_CAP_FMT_CASE_SENSITIVE |
 976                     VOL_CAP_FMT_CASE_PRESERVING |
 977                     VOL_CAP_FMT_FAST_STATFS ;
 978                 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES]
 979                     = VOL_CAP_INT_NFSEXPORT |
 980                     VOL_CAP_INT_VOL_RENAME |
 981                     VOL_CAP_INT_ADVLOCK |
 982                     VOL_CAP_INT_FLOCK;
 983                 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1]
 984                     = 0;
 985                 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2]
 986                     = 0;
 987
 988                 /* Capabilities we know about: */
 989                 fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] =
 990                     VOL_CAP_FMT_PERSISTENTOBJECTIDS |
 991                     VOL_CAP_FMT_SYMBOLICLINKS |
 992                     VOL_CAP_FMT_HARDLINKS |
 993                     VOL_CAP_FMT_JOURNAL |
 994                     VOL_CAP_FMT_JOURNAL_ACTIVE |
 995                     VOL_CAP_FMT_NO_ROOT_TIMES |
 996                     VOL_CAP_FMT_SPARSE_FILES |
 997                     VOL_CAP_FMT_ZERO_RUNS |
 998                     VOL_CAP_FMT_CASE_SENSITIVE |
 999                     VOL_CAP_FMT_CASE_PRESERVING |
1000                     VOL_CAP_FMT_FAST_STATFS |
1001                     VOL_CAP_FMT_2TB_FILESIZE;
1002                 fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] =
1003                     VOL_CAP_INT_SEARCHFS |
1004                     VOL_CAP_INT_ATTRLIST |
1005                     VOL_CAP_INT_NFSEXPORT |
1006                     VOL_CAP_INT_READDIRATTR |
1007                     VOL_CAP_INT_EXCHANGEDATA |
1008                     VOL_CAP_INT_COPYFILE |
1009                     VOL_CAP_INT_ALLOCATE |
1010                     VOL_CAP_INT_VOL_RENAME |
1011                     VOL_CAP_INT_ADVLOCK |
1012                     VOL_CAP_INT_FLOCK ;
1013                 fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0;
1014                 fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0;
1015
1016                 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
1017         }
1018
1019         if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
1020                 fsap->f_attributes.validattr.commonattr = 0;
1021                 fsap->f_attributes.validattr.volattr =
1022                     ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1023                 fsap->f_attributes.validattr.dirattr = 0;
1024                 fsap->f_attributes.validattr.fileattr = 0;
1025                 fsap->f_attributes.validattr.forkattr = 0;
1026
1027                 fsap->f_attributes.nativeattr.commonattr = 0;
1028                 fsap->f_attributes.nativeattr.volattr =
1029                     ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1030                 fsap->f_attributes.nativeattr.dirattr = 0;
1031                 fsap->f_attributes.nativeattr.fileattr = 0;
1032                 fsap->f_attributes.nativeattr.forkattr = 0;
1033
1034                 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
1035         }
1036
1037         return (0);
1038 }
1039
1040
1041 int
1042 ffs_vfs_setattr(mp, fsap, context)
1043         struct mount *mp;
1044         struct vfs_attr *fsap;
1045         vfs_context_t context;
1046 {
1047         struct ufsmount *ump;
1048         struct vnode *devvp;
1049         struct buf *bp;
1050         struct ufslabel *ulp;
1051         kauth_cred_t cred;
1052         char *offset;
1053         int bs, error;
1054
1055
1056         ump = VFSTOUFS(mp);
1057         cred = vfs_context_ucred(context);
1058
1059         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
1060                 devvp = ump->um_devvp;
1061                 bs = vfs_devblocksize(mp);
1062                 if (error = buf_meta_bread(devvp,
1063                     (daddr64_t)(UFS_LABEL_OFFSET / bs),
1064                     MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
1065                         if (bp)
1066                                 buf_brelse(bp);
1067                         return (error);
1068                 }
1069
1070                 /*
1071                  * Since the disklabel is read directly by older user space
1072                  * code, make sure this buffer won't remain in the cache when
1073                  * we release it.
1074                  */
1075                 buf_setflags(bp, B_NOCACHE);
1076
1077                 /* Validate the label structure; init if not valid */
1078                 offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
1079                 ulp = (struct ufslabel *)offset;
1080                 if (!ufs_label_check(ulp))
1081                         ufs_label_init(ulp);
1082
1083                 /* Copy new name over existing name */
1084                 ulp->ul_namelen = strlen(fsap->f_vol_name);
1085                 bcopy(fsap->f_vol_name, ulp->ul_name, ulp->ul_namelen);
1086                 ulp->ul_name[UFS_MAX_LABEL_NAME - 1] = '\0';
1087                 ulp->ul_name[ulp->ul_namelen] = '\0';
1088
1089 #if REV_ENDIAN_FS
1090                 if (mp->mnt_flag & MNT_REVEND)
1091                         ulp->ul_namelen = OSSwapInt16(ulp->ul_namelen);
1092 #endif
1093
1094                 /* Update the checksum */
1095                 ulp->ul_checksum = 0;
1096                 ulp->ul_checksum = ul_cksum(ulp, sizeof(*ulp));
1097
1098                 /* Write the label back to disk */
1099                 buf_bwrite(bp);
1100                 bp = NULL;
1101
1102                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
1103         }
1104
1105         return (0);
1106  }
1107 struct ffs_sync_cargs {
1108         vfs_context_t context;
1109         int    waitfor;
1110         int    error;
1111 };
1112
1113
1114 static int
1115 ffs_sync_callback(struct vnode *vp, void *cargs)
1116 {
1117         struct inode *ip;
1118         struct ffs_sync_cargs *args;
1119         int error;
1120
1121         args = (struct ffs_sync_cargs *)cargs;
1122
1123         ip = VTOI(vp);
1124
1125         if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) || vnode_hasdirtyblks(vp)) {
1126                 error = VNOP_FSYNC(vp, args->waitfor, args->context);
1127
1128                 if (error)
1129                         args->error = error;
1130
1131         }
1132         return (VNODE_RETURNED);
1133 }
1134
1135 /*
1136  * Go through the disk queues to initiate sandbagged IO;
1137  * go through the inodes to write those that have been modified;
1138  * initiate the writing of the super block if it has been modified.
1139  *
1140  * Note: we are always called with the filesystem marked `MPBUSY'.
1141  */
1142 int
1143 ffs_sync(mp, waitfor, context)
1144         struct mount *mp;
1145         int waitfor;
1146         vfs_context_t context;
1147 {
1148         struct vnode *nvp, *vp;
1149         struct ufsmount *ump = VFSTOUFS(mp);
1150         struct fs *fs;
1151         struct timeval tv;
1152         int error, allerror = 0;
1153         struct ffs_sync_cargs args;
1154
1155         fs = ump->um_fs;
1156         if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {            /* XXX */
1157                 printf("fs = %s\n", fs->fs_fsmnt);
1158                 panic("update: rofs mod");
1159         }
1160         /*
1161          * Write back each (modified) inode.
1162          */
1163         args.context = context;
1164         args.waitfor = waitfor;
1165         args.error = 0;
1166         /*
1167          * ffs_sync_callback will be called for each vnode
1168          * hung off of this mount point... the vnode will be
1169          * properly referenced and unreferenced around the callback
1170          */
1171         vnode_iterate(mp, 0, ffs_sync_callback, (void *)&args);
1172
1173         if (args.error)
1174                 allerror = args.error;
1175
1176         /*
1177          * Force stale file system control information to be flushed.
1178          */
1179         if (error = VNOP_FSYNC(ump->um_devvp, waitfor, context))
1180                 allerror = error;
1181 #if QUOTA
1182         qsync(mp);
1183 #endif
1184         /*
1185          * Write back modified superblock.
1186          */
1187         if (fs->fs_fmod != 0) {
1188                 fs->fs_fmod = 0;
1189                 microtime(&tv);
1190                 fs->fs_time = tv.tv_sec;
1191                 if (error = ffs_sbupdate(ump, waitfor))
1192                         allerror = error;
1193         }
1194         return (allerror);
1195 }
1196
1197 /*
1198  * Look up a FFS dinode number to find its incore vnode, otherwise read it
1199  * in from disk.  If it is in core, wait for the lock bit to clear, then
1200  * return the inode locked.  Detection and handling of mount points must be
1201  * done by the calling routine.
1202  */
1203 int
1204 ffs_vget(mp, ino, vpp, context)
1205         mount_t mp;
1206         ino64_t ino;
1207         vnode_t *vpp;
1208         vfs_context_t context;
1209 {
1210         return(ffs_vget_internal(mp, (ino_t)ino, vpp, NULL, NULL, 0, 0));
1211 }
1212
1213
1214 int
1215 ffs_vget_internal(mp, ino, vpp, dvp, cnp, mode, fhwanted)
1216         mount_t mp;
1217         ino_t   ino;
1218         vnode_t *vpp;
1219         vnode_t dvp;
1220         struct  componentname *cnp;
1221         int     mode;
1222         int     fhwanted;
1223 {
1224         struct proc *p = current_proc();                /* XXX */
1225         struct fs *fs;
1226         struct inode *ip;
1227         struct ufsmount *ump;
1228         struct buf *bp;
1229         struct vnode *vp;
1230         struct vnode_fsparam vfsp;
1231         struct timeval tv;
1232         enum vtype vtype;
1233         dev_t dev;
1234         int i, type, error = 0;
1235
1236         *vpp = NULL;
1237         ump  = VFSTOUFS(mp);
1238         dev  = ump->um_dev;
1239 #if 0
1240         /* Check for unmount in progress */
1241         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1242                 return (EPERM);
1243         }
1244 #endif
1245         /*
1246          * Allocate a new inode... do it before we check the
1247          * cache, because the MALLOC_ZONE may block
1248          */
1249         type = M_FFSNODE;
1250         MALLOC_ZONE(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
1251
1252         /*
1253          * check in the inode hash
1254          */
1255         if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
1256                /*
1257                 * found it... get rid of the allocation
1258                 * that we didn't need and return
1259                 * the 'found' vnode
1260                 */
1261                 FREE_ZONE(ip, sizeof(struct inode), type);
1262                 vp = *vpp;
1263                 return (0);
1264         }
1265         bzero((caddr_t)ip, sizeof(struct inode));
1266         /*
1267          * lock the inode
1268          */
1269 //      lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
1270 //      lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct slock *)0, p);
1271
1272         ip->i_fs = fs = ump->um_fs;
1273         ip->i_dev = dev;
1274         ip->i_number = ino;
1275 #if QUOTA
1276         for (i = 0; i < MAXQUOTAS; i++)
1277                 ip->i_dquot[i] = NODQUOT;
1278 #endif
1279         SET(ip->i_flag, IN_ALLOC);
1280         /*
1281          * Put it onto its hash chain locked so that other requests for
1282          * this inode will block if they arrive while we are sleeping waiting
1283          * for old data structures to be purged or for the contents of the
1284          * disk portion of this inode to be read.
1285          */
1286         ufs_ihashins(ip);
1287
1288         /* Read in the disk contents for the inode, copy into the inode. */
1289         if (error = (int)buf_bread(ump->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ino))),
1290                                    (int)fs->fs_bsize, NOCRED, &bp)) {
1291                 buf_brelse(bp);
1292                 goto errout;
1293         }
1294 #if REV_ENDIAN_FS
1295         if (mp->mnt_flag & MNT_REVEND) {
1296                 byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino)),ip);
1297         } else {
1298                 ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1299         }
1300 #else
1301         ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1302 #endif /* REV_ENDIAN_FS */
1303         buf_brelse(bp);
1304
1305         if (mode == 0)
1306                 vtype = IFTOVT(ip->i_mode);
1307         else
1308                 vtype = IFTOVT(mode);
1309
1310         if (vtype == VNON) {
1311                 if (fhwanted) {
1312                         /* NFS is in play */
1313                         error = ESTALE;
1314                         goto errout;
1315                 } else {
1316                         error = ENOENT;
1317                         goto errout;
1318                 }
1319         }
1320
1321         vfsp.vnfs_mp = mp;
1322         vfsp.vnfs_vtype = vtype;
1323         vfsp.vnfs_str = "ufs";
1324         vfsp.vnfs_dvp = dvp;
1325         vfsp.vnfs_fsnode = ip;
1326         vfsp.vnfs_cnp = cnp;
1327
1328         if (mode == 0)
1329                 vfsp.vnfs_filesize = ip->i_din.di_size;
1330         else
1331                 vfsp.vnfs_filesize = 0;
1332
1333         if (vtype == VFIFO )
1334                 vfsp.vnfs_vops = FFS_FIFOOPS;
1335         else if (vtype == VBLK || vtype == VCHR)
1336                 vfsp.vnfs_vops = ffs_specop_p;
1337         else
1338                 vfsp.vnfs_vops = ffs_vnodeop_p;
1339
1340         if (vtype == VBLK || vtype == VCHR)
1341                 vfsp.vnfs_rdev = ip->i_rdev;
1342         else
1343                 vfsp.vnfs_rdev = 0;
1344
1345         if (dvp && cnp && (cnp->cn_flags & MAKEENTRY))
1346                 vfsp.vnfs_flags = 0;
1347         else
1348                 vfsp.vnfs_flags = VNFS_NOCACHE;
1349
1350         /*
1351          * Tag root directory
1352          */
1353         vfsp.vnfs_markroot = (ip->i_number == ROOTINO);
1354         vfsp.vnfs_marksystem = 0;
1355
1356         if ((error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp)))
1357                 goto errout;
1358
1359         /*
1360          * Finish inode initialization now that aliasing has been resolved.
1361          */
1362         ip->i_devvp = ump->um_devvp;
1363         ip->i_vnode = vp;
1364
1365         vnode_ref(ip->i_devvp);
1366         vnode_addfsref(vp);
1367         vnode_settag(vp, VT_UFS);
1368
1369         /*
1370          * Initialize modrev times
1371          */
1372         microtime(&tv);
1373         SETHIGH(ip->i_modrev, tv.tv_sec);
1374         SETLOW(ip->i_modrev, tv.tv_usec * 4294);
1375
1376         /*
1377          * Set up a generation number for this inode if it does not
1378          * already have one. This should only happen on old filesystems.
1379          */
1380         if (ip->i_gen == 0) {
1381                 if (++nextgennumber < (u_long)tv.tv_sec)
1382                         nextgennumber = tv.tv_sec;
1383                 ip->i_gen = nextgennumber;
1384                 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1385                         ip->i_flag |= IN_MODIFIED;
1386         }
1387         /*
1388          * Ensure that uid and gid are correct. This is a temporary
1389          * fix until fsck has been changed to do the update.
1390          */
1391         if (fs->fs_inodefmt < FS_44INODEFMT) {          /* XXX */
1392                 ip->i_uid = ip->i_din.di_ouid;          /* XXX */
1393                 ip->i_gid = ip->i_din.di_ogid;          /* XXX */
1394         }                                               /* XXX */
1395         *vpp = vp;
1396
1397         CLR(ip->i_flag, IN_ALLOC);
1398
1399         if (ISSET(ip->i_flag, IN_WALLOC))
1400                 wakeup(ip);
1401
1402         return (0);
1403
1404 errout:
1405         ufs_ihashrem(ip);
1406
1407         if (ISSET(ip->i_flag, IN_WALLOC))
1408                 wakeup(ip);
1409         FREE_ZONE(ip, sizeof(struct inode), type);
1410
1411         return (error);
1412 }
1413
1414 /*
1415  * File handle to vnode
1416  *
1417  * Have to be really careful about stale file handles:
1418  * - check that the inode number is valid
1419  * - call vget to get the locked inode
1420  * - check for an unallocated inode (i_mode == 0)
1421  */
1422 int
1423 ffs_fhtovp(mp, fhlen, fhp, vpp, context)
1424         register struct mount *mp;
1425         int fhlen;
1426         unsigned char *fhp;
1427         struct vnode **vpp;
1428         vfs_context_t context;
1429 {
1430         register struct ufid *ufhp;
1431         register struct inode *ip;
1432         struct vnode *nvp;
1433         struct fs *fs;
1434         int error;
1435         ino_t     ino;
1436
1437         if (fhlen < (int)sizeof(struct ufid))
1438                 return (EINVAL);
1439         ufhp = (struct ufid *)fhp;
1440         fs = VFSTOUFS(mp)->um_fs;
1441         ino = ntohl(ufhp->ufid_ino);
1442         if (ino < ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg)
1443                 return (ESTALE);
1444         error = ffs_vget_internal(mp, ino, &nvp, NULL, NULL, 0, 1);
1445         if (error) {
1446                 *vpp = NULLVP;
1447                 return (error);
1448         }
1449         ip = VTOI(nvp);
1450         if (ip->i_mode == 0 || ip->i_gen != ntohl(ufhp->ufid_gen)) {
1451                 vnode_put(nvp);
1452                 *vpp = NULLVP;
1453                 return (ESTALE);
1454         }
1455         *vpp = nvp;
1456         return (0);
1457 }
1458
1459 /*
1460  * Vnode pointer to File handle
1461  */
1462 /* ARGSUSED */
1463 int
1464 ffs_vptofh(vp, fhlenp, fhp, context)
1465         struct vnode *vp;
1466         int *fhlenp;
1467         unsigned char *fhp;
1468         vfs_context_t context;
1469 {
1470         register struct inode *ip;
1471         register struct ufid *ufhp;
1472
1473         if (*fhlenp < (int)sizeof(struct ufid))
1474                 return (EOVERFLOW);
1475         ip = VTOI(vp);
1476         ufhp = (struct ufid *)fhp;
1477         ufhp->ufid_ino = htonl(ip->i_number);
1478         ufhp->ufid_gen = htonl(ip->i_gen);
1479         *fhlenp = sizeof(struct ufid);
1480         return (0);
1481 }
1482
1483 /*
1484  * Initialize the filesystem; just use ufs_init.
1485  */
1486 int
1487 ffs_init(vfsp)
1488         struct vfsconf *vfsp;
1489 {
1490
1491         return (ufs_init(vfsp));
1492 }
1493
1494 /*
1495  * fast filesystem related variables.
1496  */
1497 ffs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
1498                    user_addr_t newp, size_t newlen, vfs_context_t context)
1499 {
1500         extern int doclusterread, doclusterwrite, doreallocblks, doasyncfree;
1501
1502         /* all sysctl names at this level are terminal */
1503         if (namelen != 1)
1504                 return (ENOTDIR);               /* overloaded */
1505
1506         switch (name[0]) {
1507         case FFS_CLUSTERREAD:
1508                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1509                     &doclusterread));
1510         case FFS_CLUSTERWRITE:
1511                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1512                     &doclusterwrite));
1513         case FFS_REALLOCBLKS:
1514                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1515                     &doreallocblks));
1516         case FFS_ASYNCFREE:
1517                 return (sysctl_int(oldp, oldlenp, newp, newlen, &doasyncfree));
1518         default:
1519                 return (ENOTSUP);
1520         }
1521         /* NOTREACHED */
1522 }
1523
1524 /*
1525  * Write a superblock and associated information back to disk.
1526  */
1527 int
1528 ffs_sbupdate(mp, waitfor)
1529         struct ufsmount *mp;
1530         int waitfor;
1531 {
1532         register struct fs *dfs, *fs = mp->um_fs;
1533         register struct buf *bp;
1534         int blks;
1535         void *space;
1536         int i, size, error, allerror = 0;
1537         int devBlockSize=0;
1538 #if REV_ENDIAN_FS
1539         int rev_endian=(mp->um_mountp->mnt_flag & MNT_REVEND);
1540 #endif /* REV_ENDIAN_FS */
1541
1542         /*
1543          * First write back the summary information.
1544          */
1545         blks = howmany(fs->fs_cssize, fs->fs_fsize);
1546         space = fs->fs_csp;
1547         for (i = 0; i < blks; i += fs->fs_frag) {
1548                 size = fs->fs_bsize;
1549                 if (i + fs->fs_frag > blks)
1550                         size = (blks - i) * fs->fs_fsize;
1551                 bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
1552                                 size, 0, 0, BLK_META);
1553                 bcopy(space, (char *)buf_dataptr(bp), (u_int)size);
1554 #if REV_ENDIAN_FS
1555                 if (rev_endian) {
1556                         byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
1557                 }
1558 #endif /* REV_ENDIAN_FS */
1559                 space = (char *)space + size;
1560                 if (waitfor != MNT_WAIT)
1561                         buf_bawrite(bp);
1562                 else if (error = (int)buf_bwrite(bp))
1563                         allerror = error;
1564         }
1565         /*
1566          * Now write back the superblock itself. If any errors occurred
1567          * up to this point, then fail so that the superblock avoids
1568          * being written out as clean.
1569          */
1570         if (allerror)
1571                 return (allerror);
1572         devBlockSize = vfs_devblocksize(mp->um_mountp);
1573
1574         bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)(SBOFF/devBlockSize)), (int)fs->fs_sbsize, 0, 0, BLK_META);
1575         bcopy((caddr_t)fs, (char *)buf_dataptr(bp), (u_int)fs->fs_sbsize);
1576         /* Restore compatibility to old file systems.              XXX */
1577         dfs = (struct fs *)buf_dataptr(bp);                     /* XXX */
1578         if (fs->fs_postblformat == FS_42POSTBLFMT)              /* XXX */
1579                 dfs->fs_nrpos = -1;                             /* XXX */
1580 #if REV_ENDIAN_FS
1581         /*
1582         *  Swapping bytes here ; so that in case
1583         *   of inode format < FS_44INODEFMT appropriate
1584         *   fields get moved
1585         */
1586         if (rev_endian) {
1587                 byte_swap_sbout((struct fs *)buf_dataptr(bp));
1588         }
1589 #endif /* REV_ENDIAN_FS */
1590         if (fs->fs_inodefmt < FS_44INODEFMT) {                  /* XXX */
1591                 int32_t *lp, tmp;                               /* XXX */
1592                                                                 /* XXX */
1593                 lp = (int32_t *)&dfs->fs_qbmask;                /* XXX */
1594                 tmp = lp[4];                                    /* XXX */
1595                 for (i = 4; i > 0; i--)                         /* XXX */
1596                         lp[i] = lp[i-1];                        /* XXX */
1597                 lp[0] = tmp;                                    /* XXX */
1598         }                                                       /* XXX */
1599 #if REV_ENDIAN_FS
1600         /* Note that dfs is already swapped so swap the filesize
1601         *  before writing
1602         */
1603         if (rev_endian) {
1604                 dfs->fs_maxfilesize = OSSwapInt64(mp->um_savedmaxfilesize);             /* XXX */
1605         } else {
1606 #endif /* REV_ENDIAN_FS */
1607                 dfs->fs_maxfilesize = mp->um_savedmaxfilesize;  /* XXX */
1608 #if REV_ENDIAN_FS
1609         }
1610 #endif /* REV_ENDIAN_FS */
1611         if (waitfor != MNT_WAIT)
1612                 buf_bawrite(bp);
1613         else if (error = (int)buf_bwrite(bp))
1614                 allerror = error;
1615
1616         return (allerror);
1617 }