bsd/miscfs/specfs/spec_vnops.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1989, 1993, 1995
  31  *      The Regents of the University of California.  All rights reserved.
  32  *
  33  * Redistribution and use in source and binary forms, with or without
  34  * modification, are permitted provided that the following conditions
  35  * are met:
  36  * 1. Redistributions of source code must retain the above copyright
  37  *    notice, this list of conditions and the following disclaimer.
  38  * 2. Redistributions in binary form must reproduce the above copyright
  39  *    notice, this list of conditions and the following disclaimer in the
  40  *    documentation and/or other materials provided with the distribution.
  41  * 3. All advertising materials mentioning features or use of this software
  42  *    must display the following acknowledgement:
  43  *      This product includes software developed by the University of
  44  *      California, Berkeley and its contributors.
  45  * 4. Neither the name of the University nor the names of its contributors
  46  *    may be used to endorse or promote products derived from this software
  47  *    without specific prior written permission.
  48  *
  49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  59  * SUCH DAMAGE.
  60  *
  61  *      @(#)spec_vnops.c        8.14 (Berkeley) 5/21/95
  62  */
  63
  64 #include <sys/param.h>
  65 #include <sys/proc_internal.h>
  66 #include <sys/kauth.h>
  67 #include <sys/systm.h>
  68 #include <sys/kernel.h>
  69 #include <sys/conf.h>
  70 #include <sys/buf_internal.h>
  71 #include <sys/mount_internal.h>
  72 #include <sys/namei.h>
  73 #include <sys/vnode_internal.h>
  74 #include <sys/stat.h>
  75 #include <sys/errno.h>
  76 #include <sys/ioctl.h>
  77 #include <sys/file.h>
  78 #include <sys/user.h>
  79 #include <sys/malloc.h>
  80 #include <sys/disk.h>
  81 #include <sys/uio_internal.h>
  82 #include <miscfs/specfs/specdev.h>
  83 #include <vfs/vfs_support.h>
  84
  85 #include <sys/kdebug.h>
  86
  87 struct vnode *speclisth[SPECHSZ];
  88
  89 /* symbolic sleep message strings for devices */
  90 char    devopn[] = "devopn";
  91 char    devio[] = "devio";
  92 char    devwait[] = "devwait";
  93 char    devin[] = "devin";
  94 char    devout[] = "devout";
  95 char    devioc[] = "devioc";
  96 char    devcls[] = "devcls";
  97
  98 #define VOPFUNC int (*)(void *)
  99
 100 int (**spec_vnodeop_p)(void *);
 101 struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
 102         { &vnop_default_desc, (VOPFUNC)vn_default_error },
 103         { &vnop_lookup_desc, (VOPFUNC)spec_lookup },            /* lookup */
 104         { &vnop_create_desc, (VOPFUNC)err_create },             /* create */
 105         { &vnop_mknod_desc, (VOPFUNC)err_mknod },               /* mknod */
 106         { &vnop_open_desc, (VOPFUNC)spec_open },                        /* open */
 107         { &vnop_close_desc, (VOPFUNC)spec_close },              /* close */
 108         { &vnop_access_desc, (VOPFUNC)spec_access },            /* access */
 109         { &vnop_getattr_desc, (VOPFUNC)spec_getattr },          /* getattr */
 110         { &vnop_setattr_desc, (VOPFUNC)spec_setattr },          /* setattr */
 111         { &vnop_read_desc, (VOPFUNC)spec_read },                        /* read */
 112         { &vnop_write_desc, (VOPFUNC)spec_write },              /* write */
 113         { &vnop_ioctl_desc, (VOPFUNC)spec_ioctl },              /* ioctl */
 114         { &vnop_select_desc, (VOPFUNC)spec_select },            /* select */
 115         { &vnop_revoke_desc, (VOPFUNC)nop_revoke },             /* revoke */
 116         { &vnop_mmap_desc, (VOPFUNC)err_mmap },                 /* mmap */
 117         { &vnop_fsync_desc, (VOPFUNC)spec_fsync },              /* fsync */
 118         { &vnop_remove_desc, (VOPFUNC)err_remove },             /* remove */
 119         { &vnop_link_desc, (VOPFUNC)err_link },                 /* link */
 120         { &vnop_rename_desc, (VOPFUNC)err_rename },             /* rename */
 121         { &vnop_mkdir_desc, (VOPFUNC)err_mkdir },               /* mkdir */
 122         { &vnop_rmdir_desc, (VOPFUNC)err_rmdir },               /* rmdir */
 123         { &vnop_symlink_desc, (VOPFUNC)err_symlink },           /* symlink */
 124         { &vnop_readdir_desc, (VOPFUNC)err_readdir },           /* readdir */
 125         { &vnop_readlink_desc, (VOPFUNC)err_readlink },         /* readlink */
 126         { &vnop_inactive_desc, (VOPFUNC)nop_inactive },         /* inactive */
 127         { &vnop_reclaim_desc, (VOPFUNC)nop_reclaim },           /* reclaim */
 128         { &vnop_strategy_desc, (VOPFUNC)spec_strategy },                /* strategy */
 129         { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf },                /* pathconf */
 130         { &vnop_advlock_desc, (VOPFUNC)err_advlock },           /* advlock */
 131         { &vnop_bwrite_desc, (VOPFUNC)spec_bwrite },            /* bwrite */
 132         { &vnop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */
 133         { &vnop_pagein_desc, (VOPFUNC)err_pagein },             /* Pagein */
 134         { &vnop_pageout_desc, (VOPFUNC)err_pageout },           /* Pageout */
 135         { &vnop_copyfile_desc, (VOPFUNC)err_copyfile },         /* Copyfile */
 136         { &vnop_blktooff_desc, (VOPFUNC)spec_blktooff },                /* blktooff */
 137         { &vnop_offtoblk_desc, (VOPFUNC)spec_offtoblk },                /* offtoblk */
 138         { &vnop_blockmap_desc, (VOPFUNC)spec_blockmap },                /* blockmap */
 139         { (struct vnodeop_desc*)NULL, (int(*)())NULL }
 140 };
 141 struct vnodeopv_desc spec_vnodeop_opv_desc =
 142         { &spec_vnodeop_p, spec_vnodeop_entries };
 143
 144
 145 static void set_blocksize(vnode_t, dev_t);
 146
 147
 148 /*
 149  * Trivial lookup routine that always fails.
 150  */
 151 int
 152 spec_lookup(ap)
 153         struct vnop_lookup_args /* {
 154                 struct vnode *a_dvp;
 155                 struct vnode **a_vpp;
 156                 struct componentname *a_cnp;
 157                 vfs_context_t a_context;
 158         } */ *ap;
 159 {
 160
 161         *ap->a_vpp = NULL;
 162         return (ENOTDIR);
 163 }
 164
 165 static void
 166 set_blocksize(struct vnode *vp, dev_t dev)
 167 {
 168     int (*size)(dev_t);
 169     int rsize;
 170
 171     if ((major(dev) < nblkdev) && (size = bdevsw[major(dev)].d_psize)) {
 172         rsize = (*size)(dev);
 173         if (rsize <= 0)        /* did size fail? */
 174             vp->v_specsize = DEV_BSIZE;
 175         else
 176             vp->v_specsize = rsize;
 177     }
 178     else
 179             vp->v_specsize = DEV_BSIZE;
 180 }
 181
 182 void
 183 set_fsblocksize(struct vnode *vp)
 184 {
 185
 186         if (vp->v_type == VBLK) {
 187                 dev_t dev = (dev_t)vp->v_rdev;
 188                 int maj = major(dev);
 189
 190                 if ((u_int)maj >= (u_int)nblkdev)
 191                         return;
 192
 193                 vnode_lock(vp);
 194                 set_blocksize(vp, dev);
 195                 vnode_unlock(vp);
 196         }
 197
 198 }
 199
 200
 201 /*
 202  * Open a special file.
 203  */
 204 int
 205 spec_open(ap)
 206         struct vnop_open_args /* {
 207                 struct vnode *a_vp;
 208                 int  a_mode;
 209                 vfs_context_t a_context;
 210         } */ *ap;
 211 {
 212         struct proc *p = vfs_context_proc(ap->a_context);
 213         kauth_cred_t cred = vfs_context_ucred(ap->a_context);
 214         struct vnode *vp = ap->a_vp;
 215         dev_t bdev, dev = (dev_t)vp->v_rdev;
 216         int maj = major(dev);
 217         int error;
 218
 219         /*
 220          * Don't allow open if fs is mounted -nodev.
 221          */
 222         if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
 223                 return (ENXIO);
 224
 225         switch (vp->v_type) {
 226
 227         case VCHR:
 228                 if ((u_int)maj >= (u_int)nchrdev)
 229                         return (ENXIO);
 230                 if (cred != FSCRED && (ap->a_mode & FWRITE)) {
 231                         /*
 232                          * When running in very secure mode, do not allow
 233                          * opens for writing of any disk character devices.
 234                          */
 235                         if (securelevel >= 2 && isdisk(dev, VCHR))
 236                                 return (EPERM);
 237                         /*
 238                          * When running in secure mode, do not allow opens
 239                          * for writing of /dev/mem, /dev/kmem, or character
 240                          * devices whose corresponding block devices are
 241                          * currently mounted.
 242                          */
 243                         if (securelevel >= 1) {
 244                                 if ((bdev = chrtoblk(dev)) != NODEV && check_mountedon(bdev, VBLK, &error))
 245                                         return (error);
 246                                 if (iskmemdev(dev))
 247                                         return (EPERM);
 248                         }
 249                 }
 250                 if (cdevsw[maj].d_type == D_TTY) {
 251                         vnode_lock(vp);
 252                         vp->v_flag |= VISTTY;
 253                         vnode_unlock(vp);
 254                 }
 255                 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p);
 256                 return (error);
 257
 258         case VBLK:
 259                 if ((u_int)maj >= (u_int)nblkdev)
 260                         return (ENXIO);
 261                 /*
 262                  * When running in very secure mode, do not allow
 263                  * opens for writing of any disk block devices.
 264                  */
 265                 if (securelevel >= 2 && cred != FSCRED &&
 266                     (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK)
 267                         return (EPERM);
 268                 /*
 269                  * Do not allow opens of block devices that are
 270                  * currently mounted.
 271                  */
 272                 if ( (error = vfs_mountedon(vp)) )
 273                         return (error);
 274                 error = (*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p);
 275                 if (!error) {
 276                     u_int64_t blkcnt;
 277                     u_int32_t blksize;
 278                         int setsize = 0;
 279                         u_int32_t size512 = 512;
 280
 281
 282                     if (!VNOP_IOCTL(vp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, ap->a_context)) {
 283                                 /* Switch to 512 byte sectors (temporarily) */
 284
 285                                 if (!VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, ap->a_context)) {
 286                                 /* Get the number of 512 byte physical blocks. */
 287                                 if (!VNOP_IOCTL(vp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, ap->a_context)) {
 288                                                 setsize = 1;
 289                                 }
 290                                 }
 291                                 /* If it doesn't set back, we can't recover */
 292                                 if (VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, ap->a_context))
 293                                 error = ENXIO;
 294                     }
 295
 296
 297                         vnode_lock(vp);
 298                     set_blocksize(vp, dev);
 299
 300                     /*
 301                      * Cache the size in bytes of the block device for later
 302                      * use by spec_write().
 303                      */
 304                         if (setsize)
 305                                 vp->v_specdevsize = blkcnt * (u_int64_t)size512;
 306                         else
 307                         vp->v_specdevsize = (u_int64_t)0;       /* Default: Can't get */
 308
 309                         vnode_unlock(vp);
 310
 311                 }
 312                 return(error);
 313         default:
 314                 panic("spec_open type");
 315         }
 316         return (0);
 317 }
 318
 319 /*
 320  * Vnode op for read
 321  */
 322 int
 323 spec_read(ap)
 324         struct vnop_read_args /* {
 325                 struct vnode *a_vp;
 326                 struct uio *a_uio;
 327                 int  a_ioflag;
 328                 vfs_context_t a_context;
 329         } */ *ap;
 330 {
 331         register struct vnode *vp = ap->a_vp;
 332         register struct uio *uio = ap->a_uio;
 333         struct buf *bp;
 334         daddr64_t bn, nextbn;
 335         long bsize, bscale;
 336         int devBlockSize=0;
 337         int n, on;
 338         int error = 0;
 339         dev_t dev;
 340
 341 #if DIAGNOSTIC
 342         if (uio->uio_rw != UIO_READ)
 343                 panic("spec_read mode");
 344         if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg))
 345                 panic("spec_read proc");
 346 #endif
 347         if (uio_resid(uio) == 0)
 348                 return (0);
 349
 350         switch (vp->v_type) {
 351
 352         case VCHR:
 353                 error = (*cdevsw[major(vp->v_rdev)].d_read)
 354                         (vp->v_rdev, uio, ap->a_ioflag);
 355                 return (error);
 356
 357         case VBLK:
 358                 if (uio->uio_offset < 0)
 359                         return (EINVAL);
 360
 361                 dev = vp->v_rdev;
 362
 363                 devBlockSize = vp->v_specsize;
 364
 365                 if (devBlockSize > PAGE_SIZE)
 366                         return (EINVAL);
 367
 368                 bscale = PAGE_SIZE / devBlockSize;
 369                 bsize = bscale * devBlockSize;
 370
 371                 do {
 372                         on = uio->uio_offset % bsize;
 373
 374                         bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ (bscale - 1));
 375
 376                         if (vp->v_speclastr + bscale == bn) {
 377                                 nextbn = bn + bscale;
 378                                 error = buf_breadn(vp, bn, (int)bsize, &nextbn,
 379                                                (int *)&bsize, 1, NOCRED, &bp);
 380                         } else
 381                                 error = buf_bread(vp, bn, (int)bsize, NOCRED, &bp);
 382
 383                         vnode_lock(vp);
 384                         vp->v_speclastr = bn;
 385                         vnode_unlock(vp);
 386
 387                         n = bsize - buf_resid(bp);
 388                         if ((on > n) || error) {
 389                                 if (!error)
 390                                         error = EINVAL;
 391                                 buf_brelse(bp);
 392                                 return (error);
 393                         }
 394                         // LP64todo - fix this!
 395                         n = min((unsigned)(n  - on), uio_resid(uio));
 396
 397                         error = uiomove((char *)buf_dataptr(bp) + on, n, uio);
 398                         if (n + on == bsize)
 399                                 buf_markaged(bp);
 400                         buf_brelse(bp);
 401                 } while (error == 0 && uio_resid(uio) > 0 && n != 0);
 402                 return (error);
 403
 404         default:
 405                 panic("spec_read type");
 406         }
 407         /* NOTREACHED */
 408
 409         return (0);
 410 }
 411
 412 /*
 413  * Vnode op for write
 414  */
 415 int
 416 spec_write(ap)
 417         struct vnop_write_args /* {
 418                 struct vnode *a_vp;
 419                 struct uio *a_uio;
 420                 int  a_ioflag;
 421                 vfs_context_t a_context;
 422         } */ *ap;
 423 {
 424         register struct vnode *vp = ap->a_vp;
 425         register struct uio *uio = ap->a_uio;
 426         struct buf *bp;
 427         daddr64_t bn;
 428         int bsize, blkmask, bscale;
 429         register int io_sync;
 430         register int io_size;
 431         int devBlockSize=0;
 432         register int n, on;
 433         int error = 0;
 434         dev_t dev;
 435
 436 #if DIAGNOSTIC
 437         if (uio->uio_rw != UIO_WRITE)
 438                 panic("spec_write mode");
 439         if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg))
 440                 panic("spec_write proc");
 441 #endif
 442
 443         switch (vp->v_type) {
 444
 445         case VCHR:
 446                 error = (*cdevsw[major(vp->v_rdev)].d_write)
 447                         (vp->v_rdev, uio, ap->a_ioflag);
 448                 return (error);
 449
 450         case VBLK:
 451                 if (uio_resid(uio) == 0)
 452                         return (0);
 453                 if (uio->uio_offset < 0)
 454                         return (EINVAL);
 455
 456                 io_sync = (ap->a_ioflag & IO_SYNC);
 457                 // LP64todo - fix this!
 458                 io_size = uio_resid(uio);
 459
 460                 dev = (vp->v_rdev);
 461
 462                 devBlockSize = vp->v_specsize;
 463                 if (devBlockSize > PAGE_SIZE)
 464                         return(EINVAL);
 465
 466                 bscale = PAGE_SIZE / devBlockSize;
 467                 blkmask = bscale - 1;
 468                 bsize = bscale * devBlockSize;
 469
 470
 471                 do {
 472                         bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ blkmask);
 473                         on = uio->uio_offset % bsize;
 474
 475                         // LP64todo - fix this!
 476                         n = min((unsigned)(bsize - on), uio_resid(uio));
 477
 478                         /*
 479                          * Use buf_getblk() as an optimization IFF:
 480                          *
 481                          * 1)   We are reading exactly a block on a block
 482                          *      aligned boundary
 483                          * 2)   We know the size of the device from spec_open
 484                          * 3)   The read doesn't span the end of the device
 485                          *
 486                          * Otherwise, we fall back on buf_bread().
 487                          */
 488                         if (n == bsize &&
 489                             vp->v_specdevsize != (u_int64_t)0 &&
 490                             (uio->uio_offset + (u_int64_t)n) > vp->v_specdevsize) {
 491                             /* reduce the size of the read to what is there */
 492                             n = (uio->uio_offset + (u_int64_t)n) - vp->v_specdevsize;
 493                         }
 494
 495                         if (n == bsize)
 496                                 bp = buf_getblk(vp, bn, bsize, 0, 0, BLK_WRITE);
 497                         else
 498                                 error = (int)buf_bread(vp, bn, bsize, NOCRED, &bp);
 499
 500                         /* Translate downstream error for upstream, if needed */
 501                         if (!error)
 502                                 error = (int)buf_error(bp);
 503                         if (error) {
 504                                 buf_brelse(bp);
 505                                 return (error);
 506                         }
 507                         n = min(n, bsize - buf_resid(bp));
 508
 509                         error = uiomove((char *)buf_dataptr(bp) + on, n, uio);
 510                         if (error) {
 511                                 buf_brelse(bp);
 512                                 return (error);
 513                         }
 514                         buf_markaged(bp);
 515
 516                         if (io_sync)
 517                                 error = buf_bwrite(bp);
 518                         else {
 519                                 if ((n + on) == bsize)
 520                                         error = buf_bawrite(bp);
 521                                 else
 522                                         error = buf_bdwrite(bp);
 523                         }
 524                 } while (error == 0 && uio_resid(uio) > 0 && n != 0);
 525                 return (error);
 526
 527         default:
 528                 panic("spec_write type");
 529         }
 530         /* NOTREACHED */
 531
 532         return (0);
 533 }
 534
 535 /*
 536  * Device ioctl operation.
 537  */
 538 int
 539 spec_ioctl(ap)
 540         struct vnop_ioctl_args /* {
 541                 struct vnode *a_vp;
 542                 int  a_command;
 543                 caddr_t  a_data;
 544                 int  a_fflag;
 545                 vfs_context_t a_context;
 546         } */ *ap;
 547 {
 548         proc_t p = vfs_context_proc(ap->a_context);
 549         dev_t dev = ap->a_vp->v_rdev;
 550
 551         switch (ap->a_vp->v_type) {
 552
 553         case VCHR:
 554                 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
 555                     ap->a_fflag, p));
 556
 557         case VBLK:
 558                 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) {
 559                         if (bdevsw[major(dev)].d_type == D_TAPE)
 560                                 return (0);
 561                         else
 562                                 return (1);
 563                 }
 564                 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
 565                    ap->a_fflag, p));
 566
 567         default:
 568                 panic("spec_ioctl");
 569                 /* NOTREACHED */
 570         }
 571         return (0);
 572 }
 573
 574 int
 575 spec_select(ap)
 576         struct vnop_select_args /* {
 577                 struct vnode *a_vp;
 578                 int  a_which;
 579                 int  a_fflags;
 580                 void * a_wql;
 581                 vfs_context_t a_context;
 582         } */ *ap;
 583 {
 584         proc_t p = vfs_context_proc(ap->a_context);
 585         register dev_t dev;
 586
 587         switch (ap->a_vp->v_type) {
 588
 589         default:
 590                 return (1);             /* XXX */
 591
 592         case VCHR:
 593                 dev = ap->a_vp->v_rdev;
 594                 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_wql, p);
 595         }
 596 }
 597
 598 /*
 599  * Synch buffers associated with a block device
 600  */
 601 int
 602 spec_fsync_internal(vnode_t vp, int waitfor, __unused vfs_context_t context)
 603 {
 604         if (vp->v_type == VCHR)
 605                 return (0);
 606         /*
 607          * Flush all dirty buffers associated with a block device.
 608          */
 609         buf_flushdirtyblks(vp, waitfor == MNT_WAIT, 0, (char *)"spec_fsync");
 610
 611         return (0);
 612 }
 613
 614 int
 615 spec_fsync(ap)
 616         struct vnop_fsync_args /* {
 617                 struct vnode *a_vp;
 618                 int  a_waitfor;
 619                 vfs_context_t a_context;
 620         } */ *ap;
 621 {
 622         return spec_fsync_internal(ap->a_vp, ap->a_waitfor, ap->a_context);
 623 }
 624
 625 /*
 626  * Just call the device strategy routine
 627  */
 628 extern int hard_throttle_on_root;
 629
 630
 631 #define LOWPRI_DELAY_MSECS      200
 632 #define LOWPRI_WINDOW_MSECS     200
 633
 634 int     lowpri_IO_window_msecs = LOWPRI_WINDOW_MSECS;
 635 int     lowpri_IO_delay_msecs  = LOWPRI_DELAY_MSECS;
 636
 637 struct timeval last_normal_IO_timestamp;
 638 struct timeval last_lowpri_IO_timestamp;
 639 struct timeval lowpri_IO_window = { 0, LOWPRI_WINDOW_MSECS * 1000 };
 640
 641 int
 642 spec_strategy(ap)
 643         struct vnop_strategy_args /* {
 644                 struct buf *a_bp;
 645         } */ *ap;
 646 {
 647         buf_t   bp;
 648         int     bflags;
 649         dev_t   bdev;
 650         proc_t  p;
 651         struct timeval elapsed;
 652
 653         bp = ap->a_bp;
 654         bdev = buf_device(bp);
 655         bflags = buf_flags(bp);
 656
 657         if (kdebug_enable) {
 658                 int    code = 0;
 659
 660                 if (bflags & B_READ)
 661                         code |= DKIO_READ;
 662                 if (bflags & B_ASYNC)
 663                         code |= DKIO_ASYNC;
 664
 665                 if (bflags & B_META)
 666                         code |= DKIO_META;
 667                 else if (bflags & B_PAGEIO)
 668                         code |= DKIO_PAGING;
 669
 670                 KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE,
 671                                       (unsigned int)bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0);
 672         }
 673         if (((bflags & (B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) &&
 674             (buf_vnode(bp)->v_mount->mnt_kern_flag & MNTK_ROOTDEV))
 675                 hard_throttle_on_root = 1;
 676
 677         if ( lowpri_IO_delay_msecs && lowpri_IO_window_msecs ) {
 678                 p = current_proc();
 679
 680                 if ( (p == NULL) || !(p->p_lflag & P_LLOW_PRI_IO)) {
 681                         if (!(p->p_lflag & P_LBACKGROUND_IO))
 682                                 microuptime(&last_normal_IO_timestamp);
 683                 } else {
 684                         microuptime(&last_lowpri_IO_timestamp);
 685
 686                         elapsed = last_lowpri_IO_timestamp;
 687                         timevalsub(&elapsed, &last_normal_IO_timestamp);
 688
 689                         lowpri_IO_window.tv_sec  = lowpri_IO_window_msecs / 1000;
 690                         lowpri_IO_window.tv_usec = (lowpri_IO_window_msecs % 1000) * 1000;
 691
 692                         if (timevalcmp(&elapsed, &lowpri_IO_window, <)) {
 693                                 struct uthread  *ut;
 694
 695                                 /*
 696                                  * I'd really like to do the IOSleep here, but
 697                                  * we may be holding all kinds of filesystem related locks
 698                                  * and the pages for this I/O marked 'busy'...
 699                                  * we don't want to cause a normal task to block on
 700                                  * one of these locks while we're throttling a task marked
 701                                  * for low priority I/O... we'll mark the uthread and
 702                                  * do the delay just before we return from the system
 703                                  * call that triggered this I/O or from vnode_pagein
 704                                  */
 705                                 ut = get_bsdthread_info(current_thread());
 706                                 ut->uu_lowpri_delay = lowpri_IO_delay_msecs;
 707                         }
 708                 }
 709         }
 710         (*bdevsw[major(bdev)].d_strategy)(bp);
 711
 712         return (0);
 713 }
 714
 715
 716 /*
 717  * This is a noop, simply returning what one has been given.
 718  */
 719 int
 720 spec_blockmap(__unused struct vnop_blockmap_args *ap)
 721 {
 722         return (ENOTSUP);
 723 }
 724
 725
 726 /*
 727  * Device close routine
 728  */
 729 int
 730 spec_close(ap)
 731         struct vnop_close_args /* {
 732                 struct vnode *a_vp;
 733                 int  a_fflag;
 734                 vfs_context_t a_context;
 735         } */ *ap;
 736 {
 737         register struct vnode *vp = ap->a_vp;
 738         dev_t dev = vp->v_rdev;
 739         int (*devclose)(dev_t, int, int, struct proc *);
 740         int mode, error;
 741         struct proc *p = vfs_context_proc(ap->a_context);
 742
 743         switch (vp->v_type) {
 744
 745         case VCHR:
 746                 /*
 747                  * Hack: a tty device that is a controlling terminal
 748                  * has a reference from the session structure.
 749                  * We cannot easily tell that a character device is
 750                  * a controlling terminal, unless it is the closing
 751                  * process' controlling terminal.  In that case,
 752                  * if the reference count is 2 (this last descriptor
 753                  * plus the session), release the reference from the session.
 754                  */
 755                 if (vcount(vp) == 2 && p &&
 756                     vp == p->p_session->s_ttyvp) {
 757                         p->p_session->s_ttyvp = NULL;
 758                         vnode_rele(vp);
 759                 }
 760                 /*
 761                  * close on last reference.
 762                  */
 763                 if (vcount(vp) > 1)
 764                         return (0);
 765                 devclose = cdevsw[major(dev)].d_close;
 766                 mode = S_IFCHR;
 767                 break;
 768
 769         case VBLK:
 770 #ifdef DEVFS_IMPLEMENTS_LOCKING
 771                 /*
 772                  * On last close of a block device (that isn't mounted)
 773                  * we must invalidate any in core blocks, so that
 774                  * we can, for instance, change floppy disks.
 775                  */
 776                 if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context)))
 777                         return (error);
 778
 779                 error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
 780                 if (error)
 781                         return (error);
 782                 /*
 783                  * Since every use (buffer, vnode, swap, blockmap)
 784                  * holds a reference to the vnode, and because we mark
 785                  * any other vnodes that alias this device, when the
 786                  * sum of the reference counts on all the aliased
 787                  * vnodes descends to one, we are on last close.
 788                  */
 789                 if (vcount(vp) > 1)
 790                         return (0);
 791 #else /* DEVFS_IMPLEMENTS_LOCKING */
 792                 /*
 793                  * Since every use (buffer, vnode, swap, blockmap)
 794                  * holds a reference to the vnode, and because we mark
 795                  * any other vnodes that alias this device, when the
 796                  * sum of the reference counts on all the aliased
 797                  * vnodes descends to one, we are on last close.
 798                  */
 799                 if (vcount(vp) > 1)
 800                         return (0);
 801
 802                 /*
 803                  * On last close of a block device (that isn't mounted)
 804                  * we must invalidate any in core blocks, so that
 805                  * we can, for instance, change floppy disks.
 806                  */
 807                 if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context)))
 808                         return (error);
 809
 810                 error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
 811                 if (error)
 812                         return (error);
 813 #endif /* DEVFS_IMPLEMENTS_LOCKING */
 814                 devclose = bdevsw[major(dev)].d_close;
 815                 mode = S_IFBLK;
 816                 break;
 817
 818         default:
 819                 panic("spec_close: not special");
 820         }
 821
 822         return ((*devclose)(dev, ap->a_fflag, mode, p));
 823 }
 824
 825 /*
 826  * Return POSIX pathconf information applicable to special devices.
 827  */
 828 int
 829 spec_pathconf(ap)
 830         struct vnop_pathconf_args /* {
 831                 struct vnode *a_vp;
 832                 int a_name;
 833                 int *a_retval;
 834                 vfs_context_t a_context;
 835         } */ *ap;
 836 {
 837
 838         switch (ap->a_name) {
 839         case _PC_LINK_MAX:
 840                 *ap->a_retval = LINK_MAX;
 841                 return (0);
 842         case _PC_MAX_CANON:
 843                 *ap->a_retval = MAX_CANON;
 844                 return (0);
 845         case _PC_MAX_INPUT:
 846                 *ap->a_retval = MAX_INPUT;
 847                 return (0);
 848         case _PC_PIPE_BUF:
 849                 *ap->a_retval = PIPE_BUF;
 850                 return (0);
 851         case _PC_CHOWN_RESTRICTED:
 852                 *ap->a_retval = 1;
 853                 return (0);
 854         case _PC_VDISABLE:
 855                 *ap->a_retval = _POSIX_VDISABLE;
 856                 return (0);
 857         default:
 858                 return (EINVAL);
 859         }
 860         /* NOTREACHED */
 861 }
 862
 863 int
 864 spec_devblocksize(ap)
 865         struct vnop_devblocksize_args /* {
 866                 struct vnode *a_vp;
 867                 int *a_retval;
 868         } */ *ap;
 869 {
 870         *ap->a_retval = (ap->a_vp->v_specsize);
 871         return (0);
 872 }
 873
 874 /*
 875  * Special device failed operation
 876  */
 877 int
 878 spec_ebadf(__unused void *dummy)
 879 {
 880
 881         return (EBADF);
 882 }
 883
 884 /*
 885  * Special device bad operation
 886  */
 887 int
 888 spec_badop()
 889 {
 890
 891         panic("spec_badop called");
 892         /* NOTREACHED */
 893 }
 894
 895 /* Blktooff derives file offset from logical block number */
 896 int
 897 spec_blktooff(ap)
 898         struct vnop_blktooff_args /* {
 899                 struct vnode *a_vp;
 900                 daddr64_t a_lblkno;
 901                 off_t *a_offset;
 902         } */ *ap;
 903 {
 904         register struct vnode *vp = ap->a_vp;
 905
 906         switch (vp->v_type) {
 907         case VCHR:
 908                 *ap->a_offset = (off_t)-1; /* failure */
 909                 return (ENOTSUP);
 910
 911         case VBLK:
 912                 printf("spec_blktooff: not implemented for VBLK\n");
 913                 *ap->a_offset = (off_t)-1; /* failure */
 914                 return (ENOTSUP);
 915
 916         default:
 917                 panic("spec_blktooff type");
 918         }
 919         /* NOTREACHED */
 920
 921         return (0);
 922 }
 923
 924 /* Offtoblk derives logical block number from file offset */
 925 int
 926 spec_offtoblk(ap)
 927         struct vnop_offtoblk_args /* {
 928                 struct vnode *a_vp;
 929                 off_t a_offset;
 930                 daddr64_t *a_lblkno;
 931         } */ *ap;
 932 {
 933         register struct vnode *vp = ap->a_vp;
 934
 935         switch (vp->v_type) {
 936         case VCHR:
 937                 *ap->a_lblkno = (daddr64_t)-1; /* failure */
 938                 return (ENOTSUP);
 939
 940         case VBLK:
 941                 printf("spec_offtoblk: not implemented for VBLK\n");
 942                 *ap->a_lblkno = (daddr64_t)-1; /* failure */
 943                 return (ENOTSUP);
 944
 945         default:
 946                 panic("spec_offtoblk type");
 947         }
 948         /* NOTREACHED */
 949
 950         return (0);
 951 }