bsd/ufs/ffs/ffs_vfsops.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  23 /*
  24  * Copyright (c) 1989, 1991, 1993, 1994
  25  *      The Regents of the University of California.  All rights reserved.
  26  *
  27  * Redistribution and use in source and binary forms, with or without
  28  * modification, are permitted provided that the following conditions
  29  * are met:
  30  * 1. Redistributions of source code must retain the above copyright
  31  *    notice, this list of conditions and the following disclaimer.
  32  * 2. Redistributions in binary form must reproduce the above copyright
  33  *    notice, this list of conditions and the following disclaimer in the
  34  *    documentation and/or other materials provided with the distribution.
  35  * 3. All advertising materials mentioning features or use of this software
  36  *    must display the following acknowledgement:
  37  *      This product includes software developed by the University of
  38  *      California, Berkeley and its contributors.
  39  * 4. Neither the name of the University nor the names of its contributors
  40  *    may be used to endorse or promote products derived from this software
  41  *    without specific prior written permission.
  42  *
  43  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  46  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  53  * SUCH DAMAGE.
  54  *
  55  *      @(#)ffs_vfsops.c        8.31 (Berkeley) 5/20/95
  56  */
  57
  58 #include <rev_endian_fs.h>
  59 #include <sys/param.h>
  60 #include <sys/systm.h>
  61 #include <sys/namei.h>
  62 #include <sys/proc.h>
  63 #include <sys/kauth.h>
  64 #include <sys/kernel.h>
  65 #include <sys/vnode_internal.h>
  66 #include <sys/socket.h>
  67 #include <sys/mount_internal.h>
  68 #include <sys/mount.h>
  69 #include <sys/buf.h>
  70 #include <sys/mbuf.h>
  71 #include <sys/file.h>
  72 #include <sys/disk.h>
  73 #include <sys/ioctl.h>
  74 #include <sys/errno.h>
  75 #include <sys/malloc.h>
  76 #include <sys/ubc.h>
  77 #include <sys/quota.h>
  78
  79 #include <miscfs/specfs/specdev.h>
  80
  81 #include <ufs/ufs/quota.h>
  82 #include <ufs/ufs/ufsmount.h>
  83 #include <ufs/ufs/inode.h>
  84 #include <ufs/ufs/ufs_extern.h>
  85
  86 #include <ufs/ffs/fs.h>
  87 #include <ufs/ffs/ffs_extern.h>
  88 #if REV_ENDIAN_FS
  89 #include <ufs/ufs/ufs_byte_order.h>
  90 #include <architecture/byte_order.h>
  91 #endif /* REV_ENDIAN_FS */
  92
  93 int ffs_sbupdate(struct ufsmount *, int);
  94
  95 struct vfsops ufs_vfsops = {
  96         ffs_mount,
  97         ufs_start,
  98         ffs_unmount,
  99         ufs_root,
 100         ufs_quotactl,
 101         ffs_vfs_getattr,
 102         ffs_sync,
 103         ffs_vget,
 104         ffs_fhtovp,
 105         ffs_vptofh,
 106         ffs_init,
 107         ffs_sysctl,
 108         ffs_vfs_setattr,
 109         {0}
 110 };
 111
 112 extern u_long nextgennumber;
 113
 114 union _qcvt {
 115         int64_t qcvt;
 116         int32_t val[2];
 117 };
 118 #define SETHIGH(q, h) { \
 119         union _qcvt tmp; \
 120         tmp.qcvt = (q); \
 121         tmp.val[_QUAD_HIGHWORD] = (h); \
 122         (q) = tmp.qcvt; \
 123 }
 124 #define SETLOW(q, l) { \
 125         union _qcvt tmp; \
 126         tmp.qcvt = (q); \
 127         tmp.val[_QUAD_LOWWORD] = (l); \
 128         (q) = tmp.qcvt; \
 129 }
 130
 131 /*
 132  * Called by main() when ufs is going to be mounted as root.
 133  */
 134 int
 135 ffs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
 136 {
 137         struct proc *p = current_proc();        /* XXX */
 138         int     error;
 139
 140         /* Set asynchronous flag by default */
 141         vfs_setflags(mp, MNT_ASYNC);
 142
 143         if (error = ffs_mountfs(rvp, mp, context))
 144                 return (error);
 145
 146         (void)ffs_statfs(mp, vfs_statfs(mp), NULL);
 147
 148         return (0);
 149 }
 150
 151 /*
 152  * VFS Operations.
 153  *
 154  * mount system call
 155  */
 156 int
 157 ffs_mount(struct mount *mp, vnode_t devvp, __unused user_addr_t data,  vfs_context_t context)
 158 {
 159         struct proc *p = vfs_context_proc(context);
 160         struct ufsmount *ump;
 161         register struct fs *fs;
 162         u_int size;
 163         int error  = 0, flags;
 164         mode_t accessmode;
 165         int ronly;
 166         int reload = 0;
 167
 168         /*
 169          * If updating, check whether changing from read-write to
 170          * read-only; if there is no device name, that's all we do.
 171          */
 172         if (mp->mnt_flag & MNT_UPDATE) {
 173                 ump = VFSTOUFS(mp);
 174                 fs = ump->um_fs;
 175                 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 176                         /*
 177                          * Flush any dirty data.
 178                          */
 179                         VFS_SYNC(mp, MNT_WAIT, context);
 180                         /*
 181                          * Check for and optionally get rid of files open
 182                          * for writing.
 183                          */
 184                         flags = WRITECLOSE;
 185                         if (mp->mnt_flag & MNT_FORCE)
 186                                 flags |= FORCECLOSE;
 187                         if (error = ffs_flushfiles(mp, flags, p))
 188                                 return (error);
 189                         fs->fs_clean = 1;
 190                         fs->fs_ronly = 1;
 191                         if (error = ffs_sbupdate(ump, MNT_WAIT)) {
 192                                 fs->fs_clean = 0;
 193                                 fs->fs_ronly = 0;
 194                                 return (error);
 195                         }
 196                 }
 197                 /* save fs_ronly to later use */
 198                 ronly = fs->fs_ronly;
 199                 if ((mp->mnt_flag & MNT_RELOAD) || ronly)
 200                         reload = 1;
 201                 if ((reload) &&
 202                     (error = ffs_reload(mp, vfs_context_ucred(context), p)))
 203                         return (error);
 204                 /* replace the ronly after load */
 205                 fs->fs_ronly = ronly;
 206                 /*
 207                 * Do not update the file system if the user was in singleuser
 208                 * and then tries to mount -uw without fscking
 209                 */
 210                 if (!fs->fs_clean && ronly) {
 211                         printf("WARNING: trying to mount a dirty file system\n");
 212                         if (issingleuser() && (mp->mnt_flag & MNT_ROOTFS)) {
 213                                 printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",fs->fs_fsmnt);
 214                                 /*
 215                                  * Reset the readonly bit as reload might have
 216                                  * modified this bit
 217                                  */
 218                                 fs->fs_ronly = 1;
 219                                 return(EPERM);
 220                         }
 221                 }
 222
 223                 if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 224                         fs->fs_ronly = 0;
 225                         fs->fs_clean = 0;
 226                         (void) ffs_sbupdate(ump, MNT_WAIT);
 227                 }
 228                 if (devvp == 0) {
 229                         return(0);
 230                 }
 231         }
 232         if ((mp->mnt_flag & MNT_UPDATE) == 0)
 233                 error = ffs_mountfs(devvp, mp, context);
 234         else {
 235                 if (devvp != ump->um_devvp)
 236                         error = EINVAL; /* needs translation */
 237         }
 238         if (error) {
 239                 return (error);
 240         }
 241         ump = VFSTOUFS(mp);
 242         fs = ump->um_fs;
 243         bzero(fs->fs_fsmnt , sizeof(fs->fs_fsmnt));
 244         strncpy(fs->fs_fsmnt,  (caddr_t)mp->mnt_vfsstat.f_mntonname, sizeof(fs->fs_fsmnt) - 1);
 245         (void)ffs_statfs(mp, &mp->mnt_vfsstat, p);
 246         return (0);
 247 }
 248
 249
 250 struct ffs_reload_cargs {
 251         struct vnode    *devvp;
 252         kauth_cred_t cred;
 253         struct fs       *fs;
 254         struct proc     *p;
 255         int             error;
 256 #if REV_ENDIAN_FS
 257         int             rev_endian;
 258 #endif /* REV_ENDIAN_FS */
 259 };
 260
 261
 262 static int
 263 ffs_reload_callback(struct vnode *vp, void *cargs)
 264 {
 265         struct inode *ip;
 266         struct buf   *bp;
 267         struct fs    *fs;
 268         struct ffs_reload_cargs *args;
 269
 270         args = (struct ffs_reload_cargs *)cargs;
 271
 272         /*
 273          * flush all the buffers associated with this node
 274          */
 275         if (buf_invalidateblks(vp, 0, 0, 0))
 276                 panic("ffs_reload: dirty2");
 277
 278         /*
 279          * Step 6: re-read inode data
 280          */
 281         ip = VTOI(vp);
 282         fs = args->fs;
 283
 284         if (args->error = (int)buf_bread(args->devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ip->i_number))),
 285                                          (int)fs->fs_bsize, NOCRED, &bp)) {
 286                 buf_brelse(bp);
 287
 288                 return (VNODE_RETURNED_DONE);
 289         }
 290
 291 #if REV_ENDIAN_FS
 292         if (args->rev_endian) {
 293                 byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) +
 294                                     ino_to_fsbo(fs, ip->i_number)), ip);
 295         } else {
 296 #endif /* REV_ENDIAN_FS */
 297                 ip->i_din = *((struct dinode *)buf_dataptr(bp) +
 298                               ino_to_fsbo(fs, ip->i_number));
 299 #if REV_ENDIAN_FS
 300         }
 301 #endif /* REV_ENDIAN_FS */
 302
 303         buf_brelse(bp);
 304
 305         return (VNODE_RETURNED);
 306 }
 307
 308
 309 /*
 310  * Reload all incore data for a filesystem (used after running fsck on
 311  * the root filesystem and finding things to fix). The filesystem must
 312  * be mounted read-only.
 313  *
 314  * Things to do to update the mount:
 315  *      1) invalidate all cached meta-data.
 316  *      2) re-read superblock from disk.
 317  *      3) re-read summary information from disk.
 318  *      4) invalidate all inactive vnodes.
 319  *      5) invalidate all cached file data.
 320  *      6) re-read inode data for all active vnodes.
 321  */
 322 ffs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p)
 323 {
 324         register struct vnode *devvp;
 325         void *space;
 326         struct buf *bp;
 327         struct fs *fs, *newfs;
 328         int i, blks, size, error;
 329         u_int64_t maxfilesize;                                  /* XXX */
 330         int32_t *lp;
 331         struct ffs_reload_cargs args;
 332 #if REV_ENDIAN_FS
 333         int rev_endian = (mountp->mnt_flag & MNT_REVEND);
 334 #endif /* REV_ENDIAN_FS */
 335
 336         if ((mountp->mnt_flag & MNT_RDONLY) == 0)
 337                 return (EINVAL);
 338         /*
 339          * Step 1: invalidate all cached meta-data.
 340          */
 341         devvp = VFSTOUFS(mountp)->um_devvp;
 342         if (buf_invalidateblks(devvp, 0, 0, 0))
 343                 panic("ffs_reload: dirty1");
 344         /*
 345          * Step 2: re-read superblock from disk.
 346          */
 347         size = vfs_devblocksize(mountp);
 348
 349         if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)), SBSIZE, NOCRED,&bp)) {
 350                 buf_brelse(bp);
 351                 return (error);
 352         }
 353         newfs = (struct fs *)buf_dataptr(bp);
 354 #if REV_ENDIAN_FS
 355         if (rev_endian) {
 356                 byte_swap_sbin(newfs);
 357         }
 358 #endif /* REV_ENDIAN_FS */
 359         if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
 360             newfs->fs_bsize < sizeof(struct fs)) {
 361 #if REV_ENDIAN_FS
 362                 if (rev_endian)
 363                         byte_swap_sbout(newfs);
 364 #endif /* REV_ENDIAN_FS */
 365
 366                 buf_brelse(bp);
 367                 return (EIO);           /* XXX needs translation */
 368         }
 369         fs = VFSTOUFS(mountp)->um_fs;
 370         /*
 371          * Copy pointer fields back into superblock before copying in   XXX
 372          * new superblock. These should really be in the ufsmount.      XXX
 373          * Note that important parameters (eg fs_ncg) are unchanged.
 374          */
 375         newfs->fs_csp = fs->fs_csp;
 376         newfs->fs_maxcluster = fs->fs_maxcluster;
 377         newfs->fs_contigdirs = fs->fs_contigdirs;
 378         bcopy(newfs, fs, (u_int)fs->fs_sbsize);
 379         if (fs->fs_sbsize < SBSIZE)
 380                 buf_markinvalid(bp);
 381 #if REV_ENDIAN_FS
 382         if (rev_endian)
 383                 byte_swap_sbout(newfs);
 384 #endif /* REV_ENDIAN_FS */
 385         buf_brelse(bp);
 386         mountp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 387         ffs_oldfscompat(fs);
 388         maxfilesize = 0x100000000ULL;    /* 4GB */
 389         if (fs->fs_maxfilesize > maxfilesize)                   /* XXX */
 390                 fs->fs_maxfilesize = maxfilesize;               /* XXX */
 391         /*
 392          * Step 3: re-read summary information from disk.
 393          */
 394         blks = howmany(fs->fs_cssize, fs->fs_fsize);
 395         space = fs->fs_csp;
 396         for (i = 0; i < blks; i += fs->fs_frag) {
 397                 size = fs->fs_bsize;
 398                 if (i + fs->fs_frag > blks)
 399                         size = (blks - i) * fs->fs_fsize;
 400                 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)), size,
 401                                            NOCRED, &bp)) {
 402                         buf_brelse(bp);
 403                         return (error);
 404                 }
 405 #if REV_ENDIAN_FS
 406                 if (rev_endian) {
 407                         /* csum swaps */
 408                         byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
 409                 }
 410 #endif /* REV_ENDIAN_FS */
 411                 bcopy((char *)buf_dataptr(bp), space, (u_int)size);
 412 #if REV_ENDIAN_FS
 413                 if (rev_endian) {
 414                         /* csum swaps */
 415                         byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
 416                 }
 417 #endif /* REV_ENDIAN_FS */
 418                 space = (char *) space + size;
 419                 buf_brelse(bp);
 420         }
 421         /*
 422          * We no longer know anything about clusters per cylinder group.
 423          */
 424         if (fs->fs_contigsumsize > 0) {
 425                 lp = fs->fs_maxcluster;
 426                 for (i = 0; i < fs->fs_ncg; i++)
 427                         *lp++ = fs->fs_contigsumsize;
 428         }
 429 #if REV_ENDIAN_FS
 430         args.rev_endian = rev_endian;
 431 #endif /* REV_ENDIAN_FS */
 432         args.devvp = devvp;
 433         args.cred = cred;
 434         args.fs = fs;
 435         args.p = p;
 436         args.error = 0;
 437         /*
 438          * ffs_reload_callback will be called for each vnode
 439          * hung off of this mount point that can't be recycled...
 440          * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
 441          * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
 442          * properly referenced and unreferenced around the callback
 443          */
 444         vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, ffs_reload_callback, (void *)&args);
 445
 446         return (args.error);
 447 }
 448
 449 /*
 450  * Common code for mount and mountroot
 451  */
 452 int
 453 ffs_mountfs(devvp, mp, context)
 454         struct vnode *devvp;
 455         struct mount *mp;
 456         vfs_context_t context;
 457 {
 458         struct ufsmount *ump;
 459         struct buf *bp;
 460         struct fs *fs;
 461         dev_t dev;
 462         struct buf *cgbp;
 463         struct cg *cgp;
 464         int32_t clustersumoff;
 465         void *space;
 466         int error, i, blks, ronly;
 467         u_int32_t size;
 468         int32_t *lp;
 469         kauth_cred_t cred;
 470         u_int64_t maxfilesize;                                  /* XXX */
 471         u_int dbsize = DEV_BSIZE;
 472 #if REV_ENDIAN_FS
 473         int rev_endian=0;
 474 #endif /* REV_ENDIAN_FS */
 475         dev = devvp->v_rdev;
 476         cred = vfs_context_ucred(context);
 477
 478         ronly = vfs_isrdonly(mp);
 479         bp  = NULL;
 480         ump = NULL;
 481
 482         /* Advisory locking should be handled at the VFS layer */
 483         vfs_setlocklocal(mp);
 484
 485         /* Obtain the actual device block size */
 486         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&size, 0, context)) {
 487                 error = ENXIO;
 488                 goto out;
 489         }
 490
 491         if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)),
 492             SBSIZE, cred, &bp))
 493                 goto out;
 494         fs = (struct fs *)buf_dataptr(bp);
 495 #if REV_ENDIAN_FS
 496         if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
 497             fs->fs_bsize < sizeof(struct fs)) {
 498                 int magic = fs->fs_magic;
 499
 500                 byte_swap_ints(&magic, 1);
 501                 if (magic != FS_MAGIC) {
 502                         error = EINVAL;
 503                         goto out;
 504                 }
 505                 byte_swap_sbin(fs);
 506                 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
 507                         fs->fs_bsize < sizeof(struct fs)) {
 508                         byte_swap_sbout(fs);
 509                         error = EINVAL;         /* XXX needs translation */
 510                         goto out;
 511                 }
 512                 rev_endian=1;
 513         }
 514 #endif /* REV_ENDIAN_FS */
 515         if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
 516             fs->fs_bsize < sizeof(struct fs)) {
 517 #if REV_ENDIAN_FS
 518                 if (rev_endian)
 519                         byte_swap_sbout(fs);
 520 #endif /* REV_ENDIAN_FS */
 521                 error = EINVAL;         /* XXX needs translation */
 522                 goto out;
 523         }
 524
 525
 526         /*
 527          * Buffer cache does not handle multiple pages in a buf when
 528          * invalidating incore buffer in pageout. There are no locks
 529          * in the pageout path.  So there is a danger of loosing data when
 530          * block allocation happens at the same time a pageout of buddy
 531          * page occurs. incore() returns buf with both
 532          * pages, this leads vnode-pageout to incorrectly flush of entire.
 533          * buf. Till the low level ffs code is modified to deal with these
 534          * do not mount any FS more than 4K size.
 535          */
 536         /*
 537          * Can't mount filesystems with a fragment size less than DIRBLKSIZ
 538          */
 539         /*
 540          * Don't mount dirty filesystems, except for the root filesystem
 541          */
 542         if ((fs->fs_bsize > PAGE_SIZE) || (fs->fs_fsize < DIRBLKSIZ) ||
 543         ((!(mp->mnt_flag & MNT_ROOTFS)) && (!fs->fs_clean))) {
 544 #if REV_ENDIAN_FS
 545                 if (rev_endian)
 546                         byte_swap_sbout(fs);
 547 #endif /* REV_ENDIAN_FS */
 548         error = ENOTSUP;
 549         goto out;
 550     }
 551
 552         /* Let's figure out the devblock size the file system is with */
 553         /* the device block size = fragment size / number of sectors per frag */
 554
 555         dbsize = fs->fs_fsize / NSPF(fs);
 556         if(dbsize <= 0 ) {
 557                 kprintf("device blocksize computaion failed\n");
 558         } else {
 559                 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&dbsize,
 560                                 FWRITE, context) != 0) {
 561                         kprintf("failed to set device blocksize\n");
 562                 }
 563                 /* force the specfs to reread blocksize from size() */
 564                 set_fsblocksize(devvp);
 565         }
 566
 567         /* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
 568         if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
 569 #if REV_ENDIAN_FS
 570                 if (rev_endian)
 571                         byte_swap_sbout(fs);
 572 #endif /* REV_ENDIAN_FS */
 573                 error = EROFS;          /* needs translation */
 574                 goto out;
 575         }
 576
 577         /* If we are not mounting read only, then check for overlap
 578          * condition in cylinder group's free block map.
 579          * If overlap exists, then force this into a read only mount
 580          * to avoid further corruption. PR#2216969
 581          */
 582         if (ronly == 0){
 583             if (error = (int)buf_bread (devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, 0))),
 584                                         (int)fs->fs_cgsize, NOCRED, &cgbp)) {
 585                         buf_brelse(cgbp);
 586                         goto out;
 587                 }
 588                 cgp = (struct cg *)buf_dataptr(cgbp);
 589 #if REV_ENDIAN_FS
 590                 if (rev_endian)
 591                         byte_swap_cgin(cgp,fs);
 592 #endif /* REV_ENDIAN_FS */
 593                 if (!cg_chkmagic(cgp)){
 594 #if REV_ENDIAN_FS
 595                                 if (rev_endian)
 596                                         byte_swap_cgout(cgp,fs);
 597 #endif /* REV_ENDIAN_FS */
 598                         buf_brelse(cgbp);
 599                         goto out;
 600                 }
 601                 if (cgp->cg_clustersumoff != 0) {
 602                         /* Check for overlap */
 603                         clustersumoff = cgp->cg_freeoff +
 604                         howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY);
 605                         clustersumoff = roundup(clustersumoff, sizeof(long));
 606                         if (cgp->cg_clustersumoff < clustersumoff) {
 607                         /* Overlap exists */
 608                         mp->mnt_flag |= MNT_RDONLY;
 609                                 ronly = 1;
 610                         }
 611                 }
 612 #if REV_ENDIAN_FS
 613                         if (rev_endian)
 614                                 byte_swap_cgout(cgp,fs);
 615 #endif /* REV_ENDIAN_FS */
 616                         buf_brelse(cgbp);
 617         }
 618
 619         ump = _MALLOC(sizeof *ump, M_UFSMNT, M_WAITOK);
 620         bzero((caddr_t)ump, sizeof *ump);
 621         ump->um_fs = _MALLOC((u_long)fs->fs_sbsize, M_UFSMNT,
 622             M_WAITOK);
 623         bcopy((char *)buf_dataptr(bp), ump->um_fs, (u_int)fs->fs_sbsize);
 624         if (fs->fs_sbsize < SBSIZE)
 625                 buf_markinvalid(bp);
 626 #if REV_ENDIAN_FS
 627         if (rev_endian)
 628                 byte_swap_sbout(fs);
 629 #endif /* REV_ENDIAN_FS */
 630         buf_brelse(bp);
 631         bp = NULL;
 632         fs = ump->um_fs;
 633         fs->fs_ronly = ronly;
 634         size = fs->fs_cssize;
 635         blks = howmany(size, fs->fs_fsize);
 636         if (fs->fs_contigsumsize > 0)
 637                 size += fs->fs_ncg * sizeof(int32_t);
 638         size += fs->fs_ncg * sizeof(u_int8_t);
 639         space = _MALLOC((u_long)size, M_UFSMNT, M_WAITOK);
 640         fs->fs_csp = space;
 641         for (i = 0; i < blks; i += fs->fs_frag) {
 642                 size = fs->fs_bsize;
 643                 if (i + fs->fs_frag > blks)
 644                         size = (blks - i) * fs->fs_fsize;
 645                 if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
 646                                            size, cred, &bp)) {
 647                         _FREE(fs->fs_csp, M_UFSMNT);
 648                         goto out;
 649                 }
 650                 bcopy((char *)buf_dataptr(bp), space, (u_int)size);
 651 #if REV_ENDIAN_FS
 652                 if (rev_endian)
 653                         byte_swap_ints((int *) space, size / sizeof(int));
 654 #endif /* REV_ENDIAN_FS */
 655                 space = (char *)space + size;
 656                 buf_brelse(bp);
 657                 bp = NULL;
 658         }
 659         if (fs->fs_contigsumsize > 0) {
 660                 fs->fs_maxcluster = lp = space;
 661                 for (i = 0; i < fs->fs_ncg; i++)
 662                         *lp++ = fs->fs_contigsumsize;
 663                 space = lp;
 664         }
 665         size = fs->fs_ncg * sizeof(u_int8_t);
 666         fs->fs_contigdirs = (u_int8_t *)space;
 667         space = (u_int8_t *)space + size;
 668         bzero(fs->fs_contigdirs, size);
 669         /* XXX Compatibility for old filesystems */
 670         if (fs->fs_avgfilesize <= 0)
 671                 fs->fs_avgfilesize = AVFILESIZ;
 672         if (fs->fs_avgfpdir <= 0)
 673                 fs->fs_avgfpdir = AFPDIR;
 674         /* XXX End of compatibility */
 675         mp->mnt_data = (qaddr_t)ump;
 676         mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
 677         mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
 678         /* XXX warning hardcoded max symlen and not "mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;" */
 679         mp->mnt_maxsymlinklen = 60;
 680 #if REV_ENDIAN_FS
 681         if (rev_endian)
 682                 mp->mnt_flag |= MNT_REVEND;
 683 #endif /* REV_ENDIAN_FS */
 684         ump->um_mountp = mp;
 685         ump->um_dev = dev;
 686         ump->um_devvp = devvp;
 687         ump->um_nindir = fs->fs_nindir;
 688         ump->um_bptrtodb = fs->fs_fsbtodb;
 689         ump->um_seqinc = fs->fs_frag;
 690         for (i = 0; i < MAXQUOTAS; i++)
 691                 dqfileinit(&ump->um_qfiles[i]);
 692         ffs_oldfscompat(fs);
 693         ump->um_savedmaxfilesize = fs->fs_maxfilesize;          /* XXX */
 694         maxfilesize = 0x100000000ULL;    /* 4GB */
 695 #if 0
 696         maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1; /* XXX */
 697 #endif /* 0 */
 698         if (fs->fs_maxfilesize > maxfilesize)                   /* XXX */
 699                 fs->fs_maxfilesize = maxfilesize;               /* XXX */
 700         if (ronly == 0) {
 701                 fs->fs_clean = 0;
 702                 (void) ffs_sbupdate(ump, MNT_WAIT);
 703         }
 704         return (0);
 705 out:
 706         if (bp)
 707                 buf_brelse(bp);
 708         if (ump) {
 709                 _FREE(ump->um_fs, M_UFSMNT);
 710                 _FREE(ump, M_UFSMNT);
 711         }
 712         return (error);
 713 }
 714
 715 /*
 716  * Sanity checks for old file systems.
 717  *
 718  * XXX - goes away some day.
 719  */
 720 ffs_oldfscompat(fs)
 721         struct fs *fs;
 722 {
 723         int i;
 724
 725         fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);       /* XXX */
 726         fs->fs_interleave = max(fs->fs_interleave, 1);          /* XXX */
 727         if (fs->fs_postblformat == FS_42POSTBLFMT)              /* XXX */
 728                 fs->fs_nrpos = 8;                               /* XXX */
 729         if (fs->fs_inodefmt < FS_44INODEFMT) {                  /* XXX */
 730                 u_int64_t sizepb = fs->fs_bsize;                /* XXX */
 731                                                                 /* XXX */
 732                 fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */
 733                 for (i = 0; i < NIADDR; i++) {                  /* XXX */
 734                         sizepb *= NINDIR(fs);                   /* XXX */
 735                         fs->fs_maxfilesize += sizepb;           /* XXX */
 736                 }                                               /* XXX */
 737                 fs->fs_qbmask = ~fs->fs_bmask;                  /* XXX */
 738                 fs->fs_qfmask = ~fs->fs_fmask;                  /* XXX */
 739         }                                                       /* XXX */
 740         return (0);
 741 }
 742
 743 /*
 744  * unmount system call
 745  */
 746 int
 747 ffs_unmount(mp, mntflags, context)
 748         struct mount *mp;
 749         int mntflags;
 750         vfs_context_t context;
 751 {
 752         struct proc *p = vfs_context_proc(context);
 753         register struct ufsmount *ump;
 754         register struct fs *fs;
 755         int error, flags;
 756         int force;
 757
 758         flags = 0;
 759         force = 0;
 760         if (mntflags & MNT_FORCE) {
 761                 flags |= FORCECLOSE;
 762                 force = 1;
 763         }
 764         if ( (error = ffs_flushfiles(mp, flags, p)) && !force )
 765                 return (error);
 766         ump = VFSTOUFS(mp);
 767         fs = ump->um_fs;
 768
 769         if (fs->fs_ronly == 0) {
 770                 fs->fs_clean = 1;
 771                 if (error = ffs_sbupdate(ump, MNT_WAIT)) {
 772                         fs->fs_clean = 0;
 773 #ifdef notyet
 774                 /* we can atleast cleanup ; as the media could be WP */
 775                 /* & during mount, we do not check for write failures  */
 776                 /* FIXME LATER : the Correct fix would be to have */
 777                 /* mount detect the WP media and downgrade to readonly mount */
 778                 /* For now, here it is */
 779                         return (error);
 780 #endif /* notyet */
 781                 }
 782         }
 783         _FREE(fs->fs_csp, M_UFSMNT);
 784         _FREE(fs, M_UFSMNT);
 785         _FREE(ump, M_UFSMNT);
 786
 787         return (0);
 788 }
 789
 790 /*
 791  * Flush out all the files in a filesystem.
 792  */
 793 ffs_flushfiles(mp, flags, p)
 794         register struct mount *mp;
 795         int flags;
 796         struct proc *p;
 797 {
 798         register struct ufsmount *ump;
 799         int i, error;
 800
 801         ump = VFSTOUFS(mp);
 802
 803 #if QUOTA
 804         /*
 805          * NOTE: The open quota files have an indirect reference
 806          * on the root directory vnode.  We must account for this
 807          * extra reference when doing the intial vflush.
 808          */
 809         if (mp->mnt_flag & MNT_QUOTA) {
 810                 struct vnode *rootvp = NULLVP;
 811                 int quotafilecnt = 0;
 812
 813                 /* Find out how many quota files we have open. */
 814                 for (i = 0; i < MAXQUOTAS; i++) {
 815                         if (ump->um_qfiles[i].qf_vp != NULLVP)
 816                                 ++quotafilecnt;
 817                 }
 818
 819                 /*
 820                  * Check if the root vnode is in our inode hash
 821                  * (so we can skip over it).
 822                  */
 823                 rootvp = ufs_ihashget(ump->um_dev, ROOTINO);
 824
 825                 error = vflush(mp, rootvp, SKIPSYSTEM|flags);
 826
 827                 if (rootvp) {
 828                         /*
 829                          * See if there are additional references on the
 830                          * root vp besides the ones obtained from the open
 831                          * quota files and the hfs_chashget call above.
 832                          */
 833                         if ((error == 0) &&
 834                             (rootvp->v_usecount > (1 + quotafilecnt))) {
 835                                 error = EBUSY;  /* root dir is still open */
 836                         }
 837                         vnode_put(rootvp);
 838                 }
 839                 if (error && (flags & FORCECLOSE) == 0)
 840                         return (error);
 841
 842                 for (i = 0; i < MAXQUOTAS; i++) {
 843                         if (ump->um_qfiles[i].qf_vp == NULLVP)
 844                                 continue;
 845                         quotaoff(mp, i);
 846                 }
 847                 /*
 848                  * Here we fall through to vflush again to ensure
 849                  * that we have gotten rid of all the system vnodes.
 850                  */
 851         }
 852 #endif
 853         error = vflush(mp, NULLVP, SKIPSWAP|flags);
 854         error = vflush(mp, NULLVP, flags);
 855         return (error);
 856 }
 857
 858 /*
 859  * Get file system statistics.
 860  */
 861 int
 862 ffs_statfs(mp, sbp, context)
 863         struct mount *mp;
 864         register struct vfsstatfs *sbp;
 865         vfs_context_t context;
 866 {
 867         register struct ufsmount *ump;
 868         register struct fs *fs;
 869
 870         ump = VFSTOUFS(mp);
 871         fs = ump->um_fs;
 872         if (fs->fs_magic != FS_MAGIC)
 873                 panic("ffs_statfs");
 874         sbp->f_bsize = fs->fs_fsize;
 875         sbp->f_iosize = fs->fs_bsize;
 876         sbp->f_blocks = (uint64_t)((unsigned long)fs->fs_dsize);
 877         sbp->f_bfree = (uint64_t) ((unsigned long)(fs->fs_cstotal.cs_nbfree * fs->fs_frag +
 878                 fs->fs_cstotal.cs_nffree));
 879         sbp->f_bavail = (uint64_t) ((unsigned long)freespace(fs, fs->fs_minfree));
 880         sbp->f_files =  (uint64_t) ((unsigned long)(fs->fs_ncg * fs->fs_ipg - ROOTINO));
 881         sbp->f_ffree = (uint64_t) ((unsigned long)fs->fs_cstotal.cs_nifree);
 882         return (0);
 883 }
 884
 885 int
 886 ffs_vfs_getattr(mp, fsap, context)
 887         struct mount *mp;
 888         struct vfs_attr *fsap;
 889         vfs_context_t context;
 890 {
 891         struct ufsmount *ump;
 892         struct fs *fs;
 893         kauth_cred_t cred;
 894         struct vnode *devvp;
 895         struct buf *bp;
 896         struct ufslabel *ulp;
 897         char *offset;
 898         int bs, error, length;
 899
 900         ump = VFSTOUFS(mp);
 901         fs = ump->um_fs;
 902         cred = vfs_context_ucred(context);
 903
 904         VFSATTR_RETURN(fsap, f_bsize, fs->fs_fsize);
 905         VFSATTR_RETURN(fsap, f_iosize, fs->fs_bsize);
 906         VFSATTR_RETURN(fsap, f_blocks, (uint64_t)((unsigned long)fs->fs_dsize));
 907         VFSATTR_RETURN(fsap, f_bfree, (uint64_t)((unsigned long)
 908             (fs->fs_cstotal.cs_nbfree * fs->fs_frag +
 909             fs->fs_cstotal.cs_nffree)));
 910         VFSATTR_RETURN(fsap, f_bavail, (uint64_t)((unsigned long)freespace(fs,
 911             fs->fs_minfree)));
 912         VFSATTR_RETURN(fsap, f_files, (uint64_t)((unsigned long)
 913             (fs->fs_ncg * fs->fs_ipg - ROOTINO)));
 914         VFSATTR_RETURN(fsap, f_ffree, (uint64_t)((unsigned long)
 915             fs->fs_cstotal.cs_nifree));
 916
 917         if (VFSATTR_IS_ACTIVE(fsap, f_fsid)) {
 918                 fsap->f_fsid.val[0] = mp->mnt_vfsstat.f_fsid.val[0];
 919                 fsap->f_fsid.val[1] = mp->mnt_vfsstat.f_fsid.val[1];
 920                 VFSATTR_SET_SUPPORTED(fsap, f_fsid);
 921         }
 922
 923         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
 924                 devvp = ump->um_devvp;
 925                 bs = vfs_devblocksize(mp);
 926
 927                 if (error = (int)buf_meta_bread(devvp,
 928                     (daddr64_t)(UFS_LABEL_OFFSET / bs),
 929                     MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
 930                         if (bp)
 931                                 buf_brelse(bp);
 932                         return (error);
 933                 }
 934
 935                 /*
 936                  * Since the disklabel is read directly by older user space
 937                  * code, make sure this buffer won't remain in the cache when
 938                  * we release it.
 939                  */
 940                 buf_setflags(bp, B_NOCACHE);
 941
 942                 offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
 943                 ulp = (struct ufslabel *)offset;
 944
 945                 if (ufs_label_check(ulp)) {
 946                         length = ulp->ul_namelen;
 947 #if REV_ENDIAN_FS
 948                         if (mp->mnt_flag & MNT_REVEND)
 949                                 length = NXSwapShort(length);
 950 #endif
 951                         if (length > 0 && length <= UFS_MAX_LABEL_NAME) {
 952                                 bcopy(ulp->ul_name, fsap->f_vol_name, length);
 953                                 fsap->f_vol_name[UFS_MAX_LABEL_NAME - 1] = '\0';
 954                                 fsap->f_vol_name[length] = '\0';
 955                         }
 956                 }
 957
 958                 buf_brelse(bp);
 959                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
 960         }
 961
 962         if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
 963                 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] =
 964                     VOL_CAP_FMT_SYMBOLICLINKS |
 965                     VOL_CAP_FMT_HARDLINKS |
 966                     VOL_CAP_FMT_SPARSE_FILES |
 967                     VOL_CAP_FMT_CASE_SENSITIVE |
 968                     VOL_CAP_FMT_CASE_PRESERVING |
 969                     VOL_CAP_FMT_FAST_STATFS ;
 970                 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES]
 971                     = VOL_CAP_INT_NFSEXPORT |
 972                     VOL_CAP_INT_VOL_RENAME |
 973                     VOL_CAP_INT_ADVLOCK |
 974                     VOL_CAP_INT_FLOCK;
 975                 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1]
 976                     = 0;
 977                 fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2]
 978                     = 0;
 979
 980                 /* Capabilities we know about: */
 981                 fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] =
 982                     VOL_CAP_FMT_PERSISTENTOBJECTIDS |
 983                     VOL_CAP_FMT_SYMBOLICLINKS |
 984                     VOL_CAP_FMT_HARDLINKS |
 985                     VOL_CAP_FMT_JOURNAL |
 986                     VOL_CAP_FMT_JOURNAL_ACTIVE |
 987                     VOL_CAP_FMT_NO_ROOT_TIMES |
 988                     VOL_CAP_FMT_SPARSE_FILES |
 989                     VOL_CAP_FMT_ZERO_RUNS |
 990                     VOL_CAP_FMT_CASE_SENSITIVE |
 991                     VOL_CAP_FMT_CASE_PRESERVING |
 992                     VOL_CAP_FMT_FAST_STATFS |
 993                     VOL_CAP_FMT_2TB_FILESIZE;
 994                 fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] =
 995                     VOL_CAP_INT_SEARCHFS |
 996                     VOL_CAP_INT_ATTRLIST |
 997                     VOL_CAP_INT_NFSEXPORT |
 998                     VOL_CAP_INT_READDIRATTR |
 999                     VOL_CAP_INT_EXCHANGEDATA |
1000                     VOL_CAP_INT_COPYFILE |
1001                     VOL_CAP_INT_ALLOCATE |
1002                     VOL_CAP_INT_VOL_RENAME |
1003                     VOL_CAP_INT_ADVLOCK |
1004                     VOL_CAP_INT_FLOCK ;
1005                 fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0;
1006                 fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0;
1007
1008                 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
1009         }
1010
1011         if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
1012                 fsap->f_attributes.validattr.commonattr = 0;
1013                 fsap->f_attributes.validattr.volattr =
1014                     ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1015                 fsap->f_attributes.validattr.dirattr = 0;
1016                 fsap->f_attributes.validattr.fileattr = 0;
1017                 fsap->f_attributes.validattr.forkattr = 0;
1018
1019                 fsap->f_attributes.nativeattr.commonattr = 0;
1020                 fsap->f_attributes.nativeattr.volattr =
1021                     ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES;
1022                 fsap->f_attributes.nativeattr.dirattr = 0;
1023                 fsap->f_attributes.nativeattr.fileattr = 0;
1024                 fsap->f_attributes.nativeattr.forkattr = 0;
1025
1026                 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
1027         }
1028
1029         return (0);
1030 }
1031
1032
1033 int
1034 ffs_vfs_setattr(mp, fsap, context)
1035         struct mount *mp;
1036         struct vfs_attr *fsap;
1037         vfs_context_t context;
1038 {
1039         struct ufsmount *ump;
1040         struct vnode *devvp;
1041         struct buf *bp;
1042         struct ufslabel *ulp;
1043         kauth_cred_t cred;
1044         char *offset;
1045         int bs, error;
1046
1047
1048         ump = VFSTOUFS(mp);
1049         cred = vfs_context_ucred(context);
1050
1051         if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
1052                 devvp = ump->um_devvp;
1053                 bs = vfs_devblocksize(mp);
1054                 if (error = buf_meta_bread(devvp,
1055                     (daddr64_t)(UFS_LABEL_OFFSET / bs),
1056                     MAX(bs, UFS_LABEL_SIZE), cred, &bp)) {
1057                         if (bp)
1058                                 buf_brelse(bp);
1059                         return (error);
1060                 }
1061
1062                 /*
1063                  * Since the disklabel is read directly by older user space
1064                  * code, make sure this buffer won't remain in the cache when
1065                  * we release it.
1066                  */
1067                 buf_setflags(bp, B_NOCACHE);
1068
1069                 /* Validate the label structure; init if not valid */
1070                 offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs);
1071                 ulp = (struct ufslabel *)offset;
1072                 if (!ufs_label_check(ulp))
1073                         ufs_label_init(ulp);
1074
1075                 /* Copy new name over existing name */
1076                 ulp->ul_namelen = strlen(fsap->f_vol_name);
1077 #if REV_ENDIAN_FS
1078                 if (mp->mnt_flag & MNT_REVEND)
1079                         ulp->ul_namelen = NXSwapShort(ulp->ul_namelen);
1080 #endif
1081                 bcopy(fsap->f_vol_name, ulp->ul_name, ulp->ul_namelen);
1082                 ulp->ul_name[UFS_MAX_LABEL_NAME - 1] = '\0';
1083                 ulp->ul_name[ulp->ul_namelen] = '\0';
1084
1085                 /* Update the checksum */
1086                 ulp->ul_checksum = 0;
1087                 ulp->ul_checksum = ul_cksum(ulp, sizeof(*ulp));
1088
1089                 /* Write the label back to disk */
1090                 buf_bwrite(bp);
1091                 bp = NULL;
1092
1093                 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
1094         }
1095
1096         return (0);
1097  }
1098 struct ffs_sync_cargs {
1099         vfs_context_t context;
1100         int    waitfor;
1101         int    error;
1102 };
1103
1104
1105 static int
1106 ffs_sync_callback(struct vnode *vp, void *cargs)
1107 {
1108         struct inode *ip;
1109         struct ffs_sync_cargs *args;
1110         int error;
1111
1112         args = (struct ffs_sync_cargs *)cargs;
1113
1114         ip = VTOI(vp);
1115
1116         if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) || vnode_hasdirtyblks(vp)) {
1117                 error = VNOP_FSYNC(vp, args->waitfor, args->context);
1118
1119                 if (error)
1120                         args->error = error;
1121
1122         }
1123         return (VNODE_RETURNED);
1124 }
1125
1126 /*
1127  * Go through the disk queues to initiate sandbagged IO;
1128  * go through the inodes to write those that have been modified;
1129  * initiate the writing of the super block if it has been modified.
1130  *
1131  * Note: we are always called with the filesystem marked `MPBUSY'.
1132  */
1133 int
1134 ffs_sync(mp, waitfor, context)
1135         struct mount *mp;
1136         int waitfor;
1137         vfs_context_t context;
1138 {
1139         struct vnode *nvp, *vp;
1140         struct ufsmount *ump = VFSTOUFS(mp);
1141         struct fs *fs;
1142         struct timeval tv;
1143         int error, allerror = 0;
1144         struct ffs_sync_cargs args;
1145
1146         fs = ump->um_fs;
1147         if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {            /* XXX */
1148                 printf("fs = %s\n", fs->fs_fsmnt);
1149                 panic("update: rofs mod");
1150         }
1151         /*
1152          * Write back each (modified) inode.
1153          */
1154         args.context = context;
1155         args.waitfor = waitfor;
1156         args.error = 0;
1157         /*
1158          * ffs_sync_callback will be called for each vnode
1159          * hung off of this mount point... the vnode will be
1160          * properly referenced and unreferenced around the callback
1161          */
1162         vnode_iterate(mp, 0, ffs_sync_callback, (void *)&args);
1163
1164         if (args.error)
1165                 allerror = args.error;
1166
1167         /*
1168          * Force stale file system control information to be flushed.
1169          */
1170         if (error = VNOP_FSYNC(ump->um_devvp, waitfor, context))
1171                 allerror = error;
1172 #if QUOTA
1173         qsync(mp);
1174 #endif
1175         /*
1176          * Write back modified superblock.
1177          */
1178         if (fs->fs_fmod != 0) {
1179                 fs->fs_fmod = 0;
1180                 microtime(&tv);
1181                 fs->fs_time = tv.tv_sec;
1182                 if (error = ffs_sbupdate(ump, waitfor))
1183                         allerror = error;
1184         }
1185         return (allerror);
1186 }
1187
1188 /*
1189  * Look up a FFS dinode number to find its incore vnode, otherwise read it
1190  * in from disk.  If it is in core, wait for the lock bit to clear, then
1191  * return the inode locked.  Detection and handling of mount points must be
1192  * done by the calling routine.
1193  */
1194 int
1195 ffs_vget(mp, ino, vpp, context)
1196         mount_t mp;
1197         ino64_t ino;
1198         vnode_t *vpp;
1199         vfs_context_t context;
1200 {
1201         return(ffs_vget_internal(mp, (ino_t)ino, vpp, NULL, NULL, 0, 0));
1202 }
1203
1204
1205 int
1206 ffs_vget_internal(mp, ino, vpp, dvp, cnp, mode, fhwanted)
1207         mount_t mp;
1208         ino_t   ino;
1209         vnode_t *vpp;
1210         vnode_t dvp;
1211         struct  componentname *cnp;
1212         int     mode;
1213         int     fhwanted;
1214 {
1215         struct proc *p = current_proc();                /* XXX */
1216         struct fs *fs;
1217         struct inode *ip;
1218         struct ufsmount *ump;
1219         struct buf *bp;
1220         struct vnode *vp;
1221         struct vnode_fsparam vfsp;
1222         struct timeval tv;
1223         enum vtype vtype;
1224         dev_t dev;
1225         int i, type, error = 0;
1226
1227         *vpp = NULL;
1228         ump  = VFSTOUFS(mp);
1229         dev  = ump->um_dev;
1230 #if 0
1231         /* Check for unmount in progress */
1232         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1233                 return (EPERM);
1234         }
1235 #endif
1236         /*
1237          * Allocate a new inode... do it before we check the
1238          * cache, because the MALLOC_ZONE may block
1239          */
1240         type = M_FFSNODE;
1241         MALLOC_ZONE(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
1242
1243         /*
1244          * check in the inode hash
1245          */
1246         if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
1247                /*
1248                 * found it... get rid of the allocation
1249                 * that we didn't need and return
1250                 * the 'found' vnode
1251                 */
1252                 FREE_ZONE(ip, sizeof(struct inode), type);
1253                 vp = *vpp;
1254                 return (0);
1255         }
1256         bzero((caddr_t)ip, sizeof(struct inode));
1257         /*
1258          * lock the inode
1259          */
1260 //      lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
1261 //      lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct slock *)0, p);
1262
1263         ip->i_fs = fs = ump->um_fs;
1264         ip->i_dev = dev;
1265         ip->i_number = ino;
1266 #if QUOTA
1267         for (i = 0; i < MAXQUOTAS; i++)
1268                 ip->i_dquot[i] = NODQUOT;
1269 #endif
1270         SET(ip->i_flag, IN_ALLOC);
1271         /*
1272          * Put it onto its hash chain locked so that other requests for
1273          * this inode will block if they arrive while we are sleeping waiting
1274          * for old data structures to be purged or for the contents of the
1275          * disk portion of this inode to be read.
1276          */
1277         ufs_ihashins(ip);
1278
1279         /* Read in the disk contents for the inode, copy into the inode. */
1280         if (error = (int)buf_bread(ump->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ino))),
1281                                    (int)fs->fs_bsize, NOCRED, &bp)) {
1282                 buf_brelse(bp);
1283                 goto errout;
1284         }
1285 #if REV_ENDIAN_FS
1286         if (mp->mnt_flag & MNT_REVEND) {
1287                 byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino)),ip);
1288         } else {
1289                 ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1290         }
1291 #else
1292         ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino));
1293 #endif /* REV_ENDIAN_FS */
1294         buf_brelse(bp);
1295
1296         if (mode == 0)
1297                 vtype = IFTOVT(ip->i_mode);
1298         else
1299                 vtype = IFTOVT(mode);
1300
1301         if (vtype == VNON) {
1302                 if (fhwanted) {
1303                         /* NFS is in play */
1304                         error = ESTALE;
1305                         goto errout;
1306                 } else {
1307                         error = ENOENT;
1308                         goto errout;
1309                 }
1310         }
1311
1312         vfsp.vnfs_mp = mp;
1313         vfsp.vnfs_vtype = vtype;
1314         vfsp.vnfs_str = "ufs";
1315         vfsp.vnfs_dvp = dvp;
1316         vfsp.vnfs_fsnode = ip;
1317         vfsp.vnfs_cnp = cnp;
1318
1319         if (mode == 0)
1320                 vfsp.vnfs_filesize = ip->i_din.di_size;
1321         else
1322                 vfsp.vnfs_filesize = 0;
1323
1324         if (vtype == VFIFO )
1325                 vfsp.vnfs_vops = FFS_FIFOOPS;
1326         else if (vtype == VBLK || vtype == VCHR)
1327                 vfsp.vnfs_vops = ffs_specop_p;
1328         else
1329                 vfsp.vnfs_vops = ffs_vnodeop_p;
1330
1331         if (vtype == VBLK || vtype == VCHR)
1332                 vfsp.vnfs_rdev = ip->i_rdev;
1333         else
1334                 vfsp.vnfs_rdev = 0;
1335
1336         if (dvp && cnp && (cnp->cn_flags & MAKEENTRY))
1337                 vfsp.vnfs_flags = 0;
1338         else
1339                 vfsp.vnfs_flags = VNFS_NOCACHE;
1340
1341         /*
1342          * Tag root directory
1343          */
1344         vfsp.vnfs_markroot = (ip->i_number == ROOTINO);
1345         vfsp.vnfs_marksystem = 0;
1346
1347         if ((error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp)))
1348                 goto errout;
1349
1350         /*
1351          * Finish inode initialization now that aliasing has been resolved.
1352          */
1353         ip->i_devvp = ump->um_devvp;
1354         ip->i_vnode = vp;
1355
1356         vnode_ref(ip->i_devvp);
1357         vnode_addfsref(vp);
1358         vnode_settag(vp, VT_UFS);
1359
1360         /*
1361          * Initialize modrev times
1362          */
1363         microtime(&tv);
1364         SETHIGH(ip->i_modrev, tv.tv_sec);
1365         SETLOW(ip->i_modrev, tv.tv_usec * 4294);
1366
1367         /*
1368          * Set up a generation number for this inode if it does not
1369          * already have one. This should only happen on old filesystems.
1370          */
1371         if (ip->i_gen == 0) {
1372                 if (++nextgennumber < (u_long)tv.tv_sec)
1373                         nextgennumber = tv.tv_sec;
1374                 ip->i_gen = nextgennumber;
1375                 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1376                         ip->i_flag |= IN_MODIFIED;
1377         }
1378         /*
1379          * Ensure that uid and gid are correct. This is a temporary
1380          * fix until fsck has been changed to do the update.
1381          */
1382         if (fs->fs_inodefmt < FS_44INODEFMT) {          /* XXX */
1383                 ip->i_uid = ip->i_din.di_ouid;          /* XXX */
1384                 ip->i_gid = ip->i_din.di_ogid;          /* XXX */
1385         }                                               /* XXX */
1386         *vpp = vp;
1387
1388         CLR(ip->i_flag, IN_ALLOC);
1389
1390         if (ISSET(ip->i_flag, IN_WALLOC))
1391                 wakeup(ip);
1392
1393         return (0);
1394
1395 errout:
1396         ufs_ihashrem(ip);
1397
1398         if (ISSET(ip->i_flag, IN_WALLOC))
1399                 wakeup(ip);
1400         FREE_ZONE(ip, sizeof(struct inode), type);
1401
1402         return (error);
1403 }
1404
1405 /*
1406  * File handle to vnode
1407  *
1408  * Have to be really careful about stale file handles:
1409  * - check that the inode number is valid
1410  * - call vget to get the locked inode
1411  * - check for an unallocated inode (i_mode == 0)
1412  */
1413 int
1414 ffs_fhtovp(mp, fhlen, fhp, vpp, context)
1415         register struct mount *mp;
1416         int fhlen;
1417         unsigned char *fhp;
1418         struct vnode **vpp;
1419         vfs_context_t context;
1420 {
1421         register struct ufid *ufhp;
1422         register struct inode *ip;
1423         struct vnode *nvp;
1424         struct fs *fs;
1425         int error;
1426
1427         if (fhlen < (int)sizeof(struct ufid))
1428                 return (EINVAL);
1429         ufhp = (struct ufid *)fhp;
1430         fs = VFSTOUFS(mp)->um_fs;
1431         if (ufhp->ufid_ino < ROOTINO ||
1432             ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1433                 return (ESTALE);
1434         error = ffs_vget_internal(mp, ufhp->ufid_ino, &nvp, NULL, NULL, 0, 1);
1435         if (error) {
1436                 *vpp = NULLVP;
1437                 return (error);
1438         }
1439         ip = VTOI(nvp);
1440         if (ip->i_mode == 0 || ip->i_gen != ufhp->ufid_gen) {
1441                 vnode_put(nvp);
1442                 *vpp = NULLVP;
1443                 return (ESTALE);
1444         }
1445         *vpp = nvp;
1446         return (0);
1447 }
1448
1449 /*
1450  * Vnode pointer to File handle
1451  */
1452 /* ARGSUSED */
1453 int
1454 ffs_vptofh(vp, fhlenp, fhp, context)
1455         struct vnode *vp;
1456         int *fhlenp;
1457         unsigned char *fhp;
1458         vfs_context_t context;
1459 {
1460         register struct inode *ip;
1461         register struct ufid *ufhp;
1462
1463         if (*fhlenp < (int)sizeof(struct ufid))
1464                 return (EOVERFLOW);
1465         ip = VTOI(vp);
1466         ufhp = (struct ufid *)fhp;
1467         ufhp->ufid_ino = ip->i_number;
1468         ufhp->ufid_gen = ip->i_gen;
1469         *fhlenp = sizeof(struct ufid);
1470         return (0);
1471 }
1472
1473 /*
1474  * Initialize the filesystem; just use ufs_init.
1475  */
1476 int
1477 ffs_init(vfsp)
1478         struct vfsconf *vfsp;
1479 {
1480
1481         return (ufs_init(vfsp));
1482 }
1483
1484 /*
1485  * fast filesystem related variables.
1486  */
1487 ffs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
1488                    user_addr_t newp, size_t newlen, vfs_context_t context)
1489 {
1490         extern int doclusterread, doclusterwrite, doreallocblks, doasyncfree;
1491
1492         /* all sysctl names at this level are terminal */
1493         if (namelen != 1)
1494                 return (ENOTDIR);               /* overloaded */
1495
1496         switch (name[0]) {
1497         case FFS_CLUSTERREAD:
1498                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1499                     &doclusterread));
1500         case FFS_CLUSTERWRITE:
1501                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1502                     &doclusterwrite));
1503         case FFS_REALLOCBLKS:
1504                 return (sysctl_int(oldp, oldlenp, newp, newlen,
1505                     &doreallocblks));
1506         case FFS_ASYNCFREE:
1507                 return (sysctl_int(oldp, oldlenp, newp, newlen, &doasyncfree));
1508         default:
1509                 return (ENOTSUP);
1510         }
1511         /* NOTREACHED */
1512 }
1513
1514 /*
1515  * Write a superblock and associated information back to disk.
1516  */
1517 int
1518 ffs_sbupdate(mp, waitfor)
1519         struct ufsmount *mp;
1520         int waitfor;
1521 {
1522         register struct fs *dfs, *fs = mp->um_fs;
1523         register struct buf *bp;
1524         int blks;
1525         void *space;
1526         int i, size, error, allerror = 0;
1527         int devBlockSize=0;
1528 #if REV_ENDIAN_FS
1529         int rev_endian=(mp->um_mountp->mnt_flag & MNT_REVEND);
1530 #endif /* REV_ENDIAN_FS */
1531
1532         /*
1533          * First write back the summary information.
1534          */
1535         blks = howmany(fs->fs_cssize, fs->fs_fsize);
1536         space = fs->fs_csp;
1537         for (i = 0; i < blks; i += fs->fs_frag) {
1538                 size = fs->fs_bsize;
1539                 if (i + fs->fs_frag > blks)
1540                         size = (blks - i) * fs->fs_fsize;
1541                 bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)),
1542                                 size, 0, 0, BLK_META);
1543                 bcopy(space, (char *)buf_dataptr(bp), (u_int)size);
1544 #if REV_ENDIAN_FS
1545                 if (rev_endian) {
1546                         byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int));
1547                 }
1548 #endif /* REV_ENDIAN_FS */
1549                 space = (char *)space + size;
1550                 if (waitfor != MNT_WAIT)
1551                         buf_bawrite(bp);
1552                 else if (error = (int)buf_bwrite(bp))
1553                         allerror = error;
1554         }
1555         /*
1556          * Now write back the superblock itself. If any errors occurred
1557          * up to this point, then fail so that the superblock avoids
1558          * being written out as clean.
1559          */
1560         if (allerror)
1561                 return (allerror);
1562         devBlockSize = vfs_devblocksize(mp->um_mountp);
1563
1564         bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)(SBOFF/devBlockSize)), (int)fs->fs_sbsize, 0, 0, BLK_META);
1565         bcopy((caddr_t)fs, (char *)buf_dataptr(bp), (u_int)fs->fs_sbsize);
1566         /* Restore compatibility to old file systems.              XXX */
1567         dfs = (struct fs *)buf_dataptr(bp);                     /* XXX */
1568         if (fs->fs_postblformat == FS_42POSTBLFMT)              /* XXX */
1569                 dfs->fs_nrpos = -1;                             /* XXX */
1570 #if REV_ENDIAN_FS
1571         /*
1572         *  Swapping bytes here ; so that in case
1573         *   of inode format < FS_44INODEFMT appropriate
1574         *   fields get moved
1575         */
1576         if (rev_endian) {
1577                 byte_swap_sbout((struct fs *)buf_dataptr(bp));
1578         }
1579 #endif /* REV_ENDIAN_FS */
1580         if (fs->fs_inodefmt < FS_44INODEFMT) {                  /* XXX */
1581                 int32_t *lp, tmp;                               /* XXX */
1582                                                                 /* XXX */
1583                 lp = (int32_t *)&dfs->fs_qbmask;                /* XXX */
1584                 tmp = lp[4];                                    /* XXX */
1585                 for (i = 4; i > 0; i--)                         /* XXX */
1586                         lp[i] = lp[i-1];                        /* XXX */
1587                 lp[0] = tmp;                                    /* XXX */
1588         }                                                       /* XXX */
1589 #if REV_ENDIAN_FS
1590         /* Note that dfs is already swapped so swap the filesize
1591         *  before writing
1592         */
1593         if (rev_endian) {
1594                 dfs->fs_maxfilesize = NXSwapLongLong(mp->um_savedmaxfilesize);          /* XXX */
1595         } else {
1596 #endif /* REV_ENDIAN_FS */
1597                 dfs->fs_maxfilesize = mp->um_savedmaxfilesize;  /* XXX */
1598 #if REV_ENDIAN_FS
1599         }
1600 #endif /* REV_ENDIAN_FS */
1601         if (waitfor != MNT_WAIT)
1602                 buf_bawrite(bp);
1603         else if (error = (int)buf_bwrite(bp))
1604                 allerror = error;
1605
1606         return (allerror);
1607 }