X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/fa4905b191e0d16b0fffd53bd565eca71d01fae0..4452a7af2eac33dbad800bcc91f2399d62c18f53:/bsd/miscfs/specfs/spec_vnops.c diff --git a/bsd/miscfs/specfs/spec_vnops.c b/bsd/miscfs/specfs/spec_vnops.c index 81b836085..1c88c6a22 100644 --- a/bsd/miscfs/specfs/spec_vnops.c +++ b/bsd/miscfs/specfs/spec_vnops.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ /* @@ -56,23 +62,27 @@ */ #include -#include +#include +#include #include #include #include -#include -#include +#include +#include #include -#include +#include #include #include #include #include +#include #include -#include +#include +#include #include #include +#include struct vnode *speclisth[SPECHSZ]; @@ -89,70 +99,61 @@ char devcls[] = "devcls"; int (**spec_vnodeop_p)(void *); struct vnodeopv_entry_desc spec_vnodeop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)err_create }, /* create */ - { &vop_mknod_desc, (VOPFUNC)err_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)spec_open }, /* open */ - { &vop_close_desc, (VOPFUNC)spec_close }, /* close */ - { &vop_access_desc, (VOPFUNC)spec_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)spec_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)spec_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)spec_read }, /* read */ - { &vop_write_desc, (VOPFUNC)spec_write }, /* write */ - { &vop_lease_desc, (VOPFUNC)nop_lease }, /* lease */ - { &vop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)spec_select }, /* select */ - { &vop_revoke_desc, (VOPFUNC)nop_revoke }, /* revoke */ - { &vop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)spec_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)err_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)err_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)err_link }, /* link */ - { &vop_rename_desc, (VOPFUNC)err_rename }, /* rename */ - { &vop_mkdir_desc, (VOPFUNC)err_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)err_rmdir }, /* rmdir */ - { &vop_symlink_desc, (VOPFUNC)err_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)err_readdir }, /* readdir */ - { &vop_readlink_desc, (VOPFUNC)err_readlink }, /* readlink */ - { &vop_abortop_desc, (VOPFUNC)err_abortop }, /* abortop */ - { &vop_inactive_desc, (VOPFUNC)nop_inactive }, /* inactive */ - { &vop_reclaim_desc, (VOPFUNC)nop_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)nop_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)nop_unlock }, /* unlock */ - { &vop_bmap_desc, (VOPFUNC)spec_bmap }, /* bmap */ - { &vop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */ - { &vop_print_desc, (VOPFUNC)spec_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)nop_islocked }, /* islocked */ - { &vop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ - { &vop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ - { &vop_blkatoff_desc, (VOPFUNC)err_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (VOPFUNC)err_valloc }, /* valloc */ - { &vop_vfree_desc, (VOPFUNC)err_vfree }, /* vfree */ - { &vop_truncate_desc, (VOPFUNC)nop_truncate }, /* truncate */ - { &vop_update_desc, (VOPFUNC)nop_update }, /* update */ - { &vop_bwrite_desc, (VOPFUNC)spec_bwrite }, /* bwrite */ - { &vop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ - { &vop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */ - { &vop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */ - { &vop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ - { &vop_blktooff_desc, (VOPFUNC)spec_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (VOPFUNC)spec_offtoblk }, /* offtoblk */ - { &vop_cmap_desc, (VOPFUNC)spec_cmap }, /* cmap */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)err_create }, /* create */ + { &vnop_mknod_desc, (VOPFUNC)err_mknod }, /* mknod */ + { &vnop_open_desc, (VOPFUNC)spec_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)spec_close }, /* close */ + { &vnop_access_desc, (VOPFUNC)spec_access }, /* access */ + { &vnop_getattr_desc, (VOPFUNC)spec_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC)spec_setattr }, /* setattr */ + { &vnop_read_desc, (VOPFUNC)spec_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)spec_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)spec_select }, /* select */ + { &vnop_revoke_desc, (VOPFUNC)nop_revoke }, /* revoke */ + { &vnop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)spec_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)err_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)err_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC)err_rename }, /* rename */ + { &vnop_mkdir_desc, (VOPFUNC)err_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)err_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)err_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)err_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC)err_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC)nop_inactive }, /* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)nop_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ + { &vnop_bwrite_desc, (VOPFUNC)spec_bwrite }, /* bwrite */ + { &vnop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */ + { &vnop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */ + { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ + { &vnop_blktooff_desc, (VOPFUNC)spec_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC)spec_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (VOPFUNC)spec_blockmap }, /* blockmap */ { (struct vnodeop_desc*)NULL, (int(*)())NULL } }; struct vnodeopv_desc spec_vnodeop_opv_desc = { &spec_vnodeop_p, spec_vnodeop_entries }; + +static void set_blocksize(vnode_t, dev_t); + + /* * Trivial lookup routine that always fails. */ int spec_lookup(ap) - struct vop_lookup_args /* { + struct vnop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; + vfs_context_t a_context; } */ *ap; { @@ -160,10 +161,10 @@ spec_lookup(ap) return (ENOTDIR); } -void +static void set_blocksize(struct vnode *vp, dev_t dev) { - int (*size)(); + int (*size)(dev_t); int rsize; if ((major(dev) < nblkdev) && (size = bdevsw[major(dev)].d_psize)) { @@ -185,10 +186,12 @@ set_fsblocksize(struct vnode *vp) dev_t dev = (dev_t)vp->v_rdev; int maj = major(dev); - if ((u_int)maj >= nblkdev) + if ((u_int)maj >= (u_int)nblkdev) return; + vnode_lock(vp); set_blocksize(vp, dev); + vnode_unlock(vp); } } @@ -197,17 +200,17 @@ set_fsblocksize(struct vnode *vp) /* * Open a special file. */ -/* ARGSUSED */ +int spec_open(ap) - struct vop_open_args /* { + struct vnop_open_args /* { struct vnode *a_vp; int a_mode; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { - struct proc *p = ap->a_p; - struct vnode *bvp, *vp = ap->a_vp; + struct proc *p = vfs_context_proc(ap->a_context); + kauth_cred_t cred = vfs_context_ucred(ap->a_context); + struct vnode *vp = ap->a_vp; dev_t bdev, dev = (dev_t)vp->v_rdev; int maj = major(dev); int error; @@ -221,9 +224,9 @@ spec_open(ap) switch (vp->v_type) { case VCHR: - if ((u_int)maj >= nchrdev) + if ((u_int)maj >= (u_int)nchrdev) return (ENXIO); - if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { + if (cred != FSCRED && (ap->a_mode & FWRITE)) { /* * When running in very secure mode, do not allow * opens for writing of any disk character devices. @@ -237,43 +240,77 @@ spec_open(ap) * currently mounted. */ if (securelevel >= 1) { - if ((bdev = chrtoblk(dev)) != NODEV && - vfinddev(bdev, VBLK, &bvp) && - bvp->v_usecount > 0 && - (error = vfs_mountedon(bvp))) + if ((bdev = chrtoblk(dev)) != NODEV && check_mountedon(bdev, VBLK, &error)) return (error); if (iskmemdev(dev)) return (EPERM); } } - if (cdevsw[maj].d_type == D_TTY) + if (cdevsw[maj].d_type == D_TTY) { + vnode_lock(vp); vp->v_flag |= VISTTY; - VOP_UNLOCK(vp, 0, p); + vnode_unlock(vp); + } error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: - if ((u_int)maj >= nblkdev) + if ((u_int)maj >= (u_int)nblkdev) return (ENXIO); /* * When running in very secure mode, do not allow * opens for writing of any disk block devices. */ - if (securelevel >= 2 && ap->a_cred != FSCRED && + if (securelevel >= 2 && cred != FSCRED && (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK) return (EPERM); /* * Do not allow opens of block devices that are * currently mounted. */ - if (error = vfs_mountedon(vp)) + if ( (error = vfs_mountedon(vp)) ) return (error); error = (*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p); if (!error) { + u_int64_t blkcnt; + u_int32_t blksize; + int setsize = 0; + u_int32_t size512 = 512; + + + if (!VNOP_IOCTL(vp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, ap->a_context)) { + /* Switch to 512 byte sectors (temporarily) */ + + if (!VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, ap->a_context)) { + /* Get the number of 512 byte physical blocks. */ + if (!VNOP_IOCTL(vp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, ap->a_context)) { + setsize = 1; + } + } + /* If it doesn't set back, we can't recover */ + if (VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, ap->a_context)) + error = ENXIO; + } + + + vnode_lock(vp); set_blocksize(vp, dev); + + /* + * Cache the size in bytes of the block device for later + * use by spec_write(). + */ + if (setsize) + vp->v_specdevsize = blkcnt * (u_int64_t)size512; + else + vp->v_specdevsize = (u_int64_t)0; /* Default: Can't get */ + + vnode_unlock(vp); + } return(error); + default: + panic("spec_open type"); } return (0); } @@ -281,42 +318,39 @@ spec_open(ap) /* * Vnode op for read */ -/* ARGSUSED */ +int spec_read(ap) - struct vop_read_args /* { + struct vnop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; - struct proc *p = uio->uio_procp; struct buf *bp; - daddr_t bn, nextbn; + daddr64_t bn, nextbn; long bsize, bscale; int devBlockSize=0; - int n, on, majordev, (*ioctl)(); + int n, on; int error = 0; dev_t dev; #if DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("spec_read mode"); - if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != current_proc()) + if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) panic("spec_read proc"); #endif - if (uio->uio_resid == 0) + if (uio_resid(uio) == 0) return (0); switch (vp->v_type) { case VCHR: - VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)].d_read) (vp->v_rdev, uio, ap->a_ioflag); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: @@ -336,55 +370,60 @@ spec_read(ap) do { on = uio->uio_offset % bsize; - bn = (uio->uio_offset / devBlockSize) &~ (bscale - 1); + bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ (bscale - 1)); - if (vp->v_lastr + bscale == bn) { + if (vp->v_speclastr + bscale == bn) { nextbn = bn + bscale; - error = breadn(vp, bn, (int)bsize, &nextbn, + error = buf_breadn(vp, bn, (int)bsize, &nextbn, (int *)&bsize, 1, NOCRED, &bp); } else - error = bread(vp, bn, (int)bsize, NOCRED, &bp); + error = buf_bread(vp, bn, (int)bsize, NOCRED, &bp); - vp->v_lastr = bn; - n = bsize - bp->b_resid; + vnode_lock(vp); + vp->v_speclastr = bn; + vnode_unlock(vp); + + n = bsize - buf_resid(bp); if ((on > n) || error) { if (!error) error = EINVAL; - brelse(bp); + buf_brelse(bp); return (error); } - n = min((unsigned)(n - on), uio->uio_resid); + // LP64todo - fix this! + n = min((unsigned)(n - on), uio_resid(uio)); - error = uiomove((char *)bp->b_data + on, n, uio); + error = uiomove((char *)buf_dataptr(bp) + on, n, uio); if (n + on == bsize) - bp->b_flags |= B_AGE; - brelse(bp); - } while (error == 0 && uio->uio_resid > 0 && n != 0); + buf_markaged(bp); + buf_brelse(bp); + } while (error == 0 && uio_resid(uio) > 0 && n != 0); return (error); default: panic("spec_read type"); } /* NOTREACHED */ + + return (0); } /* * Vnode op for write */ -/* ARGSUSED */ +int spec_write(ap) - struct vop_write_args /* { + struct vnop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; - struct proc *p = uio->uio_procp; struct buf *bp; - daddr_t bn; + daddr64_t bn; int bsize, blkmask, bscale; register int io_sync; register int io_size; @@ -396,27 +435,26 @@ spec_write(ap) #if DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) panic("spec_write mode"); - if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != current_proc()) + if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) panic("spec_write proc"); #endif switch (vp->v_type) { case VCHR: - VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)].d_write) (vp->v_rdev, uio, ap->a_ioflag); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: - if (uio->uio_resid == 0) + if (uio_resid(uio) == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); io_sync = (ap->a_ioflag & IO_SYNC); - io_size = uio->uio_resid; + // LP64todo - fix this! + io_size = uio_resid(uio); dev = (vp->v_rdev); @@ -430,91 +468,119 @@ spec_write(ap) do { - bn = (uio->uio_offset / devBlockSize) &~ blkmask; + bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ blkmask); on = uio->uio_offset % bsize; - n = min((unsigned)(bsize - on), uio->uio_resid); + // LP64todo - fix this! + n = min((unsigned)(bsize - on), uio_resid(uio)); + + /* + * Use buf_getblk() as an optimization IFF: + * + * 1) We are reading exactly a block on a block + * aligned boundary + * 2) We know the size of the device from spec_open + * 3) The read doesn't span the end of the device + * + * Otherwise, we fall back on buf_bread(). + */ + if (n == bsize && + vp->v_specdevsize != (u_int64_t)0 && + (uio->uio_offset + (u_int64_t)n) > vp->v_specdevsize) { + /* reduce the size of the read to what is there */ + n = (uio->uio_offset + (u_int64_t)n) - vp->v_specdevsize; + } if (n == bsize) - bp = getblk(vp, bn, bsize, 0, 0, BLK_WRITE); + bp = buf_getblk(vp, bn, bsize, 0, 0, BLK_WRITE); else - error = bread(vp, bn, bsize, NOCRED, &bp); + error = (int)buf_bread(vp, bn, bsize, NOCRED, &bp); + /* Translate downstream error for upstream, if needed */ + if (!error) + error = (int)buf_error(bp); if (error) { - brelse(bp); + buf_brelse(bp); return (error); } - n = min(n, bsize - bp->b_resid); - - error = uiomove((char *)bp->b_data + on, n, uio); + n = min(n, bsize - buf_resid(bp)); - bp->b_flags |= B_AGE; + error = uiomove((char *)buf_dataptr(bp) + on, n, uio); + if (error) { + buf_brelse(bp); + return (error); + } + buf_markaged(bp); if (io_sync) - bwrite(bp); + error = buf_bwrite(bp); else { if ((n + on) == bsize) - bawrite(bp); + error = buf_bawrite(bp); else - bdwrite(bp); + error = buf_bdwrite(bp); } - } while (error == 0 && uio->uio_resid > 0 && n != 0); + } while (error == 0 && uio_resid(uio) > 0 && n != 0); return (error); default: panic("spec_write type"); } /* NOTREACHED */ + + return (0); } /* * Device ioctl operation. */ -/* ARGSUSED */ +int spec_ioctl(ap) - struct vop_ioctl_args /* { + struct vnop_ioctl_args /* { struct vnode *a_vp; int a_command; caddr_t a_data; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { + proc_t p = vfs_context_proc(ap->a_context); dev_t dev = ap->a_vp->v_rdev; switch (ap->a_vp->v_type) { case VCHR: return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, - ap->a_fflag, ap->a_p)); + ap->a_fflag, p)); case VBLK: - if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) + if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) { if (bdevsw[major(dev)].d_type == D_TAPE) return (0); else return (1); + } return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, - ap->a_fflag, ap->a_p)); + ap->a_fflag, p)); default: panic("spec_ioctl"); /* NOTREACHED */ } + return (0); } -/* ARGSUSED */ +int spec_select(ap) - struct vop_select_args /* { + struct vnop_select_args /* { struct vnode *a_vp; int a_which; int a_fflags; - struct ucred *a_cred; void * a_wql; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { + proc_t p = vfs_context_proc(ap->a_context); register dev_t dev; switch (ap->a_vp->v_type) { @@ -524,130 +590,154 @@ spec_select(ap) case VCHR: dev = ap->a_vp->v_rdev; - return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_wql, ap->a_p); + return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_wql, p); } } + /* * Synch buffers associated with a block device */ -/* ARGSUSED */ int -spec_fsync(ap) - struct vop_fsync_args /* { - struct vnode *a_vp; - struct ucred *a_cred; - int a_waitfor; - struct proc *a_p; - } */ *ap; +spec_fsync_internal(vnode_t vp, int waitfor, __unused vfs_context_t context) { - register struct vnode *vp = ap->a_vp; - register struct buf *bp; - struct buf *nbp; - int s; - if (vp->v_type == VCHR) return (0); /* * Flush all dirty buffers associated with a block device. */ -loop: - s = splbio(); - for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { - nbp = bp->b_vnbufs.le_next; - if ((bp->b_flags & B_BUSY)) - continue; - if ((bp->b_flags & B_DELWRI) == 0) - panic("spec_fsync: not dirty"); - bremfree(bp); - bp->b_flags |= B_BUSY; - splx(s); - bawrite(bp); - goto loop; - } - if (ap->a_waitfor == MNT_WAIT) { - while (vp->v_numoutput) { - vp->v_flag |= VBWAIT; - tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spec_fsync", 0); - } -#if DIAGNOSTIC - if (vp->v_dirtyblkhd.lh_first) { - vprint("spec_fsync: dirty", vp); - splx(s); - goto loop; - } -#endif - } - splx(s); + buf_flushdirtyblks(vp, waitfor == MNT_WAIT, 0, (char *)"spec_fsync"); + return (0); } -/* - * Just call the device strategy routine - */ -spec_strategy(ap) - struct vop_strategy_args /* { - struct buf *a_bp; +int +spec_fsync(ap) + struct vnop_fsync_args /* { + struct vnode *a_vp; + int a_waitfor; + vfs_context_t a_context; } */ *ap; { - (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp); - return (0); + return spec_fsync_internal(ap->a_vp, ap->a_waitfor, ap->a_context); } /* - * This is a noop, simply returning what one has been given. + * Just call the device strategy routine */ -spec_bmap(ap) - struct vop_bmap_args /* { - struct vnode *a_vp; - daddr_t a_bn; - struct vnode **a_vpp; - daddr_t *a_bnp; - int *a_runp; +extern int hard_throttle_on_root; + + +#define LOWPRI_DELAY_MSECS 200 +#define LOWPRI_WINDOW_MSECS 200 + +int lowpri_IO_window_msecs = LOWPRI_WINDOW_MSECS; +int lowpri_IO_delay_msecs = LOWPRI_DELAY_MSECS; + +struct timeval last_normal_IO_timestamp; +struct timeval last_lowpri_IO_timestamp; +struct timeval lowpri_IO_window = { 0, LOWPRI_WINDOW_MSECS * 1000 }; + +int +spec_strategy(ap) + struct vnop_strategy_args /* { + struct buf *a_bp; } */ *ap; { + buf_t bp; + int bflags; + dev_t bdev; + proc_t p; + struct timeval elapsed; + + bp = ap->a_bp; + bdev = buf_device(bp); + bflags = buf_flags(bp); + + if (kdebug_enable) { + int code = 0; + + if (bflags & B_READ) + code |= DKIO_READ; + if (bflags & B_ASYNC) + code |= DKIO_ASYNC; + + if (bflags & B_META) + code |= DKIO_META; + else if (bflags & B_PAGEIO) + code |= DKIO_PAGING; + + KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE, + (unsigned int)bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0); + } + if (((bflags & (B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) && + (buf_vnode(bp)->v_mount->mnt_kern_flag & MNTK_ROOTDEV)) + hard_throttle_on_root = 1; + + if ( lowpri_IO_delay_msecs && lowpri_IO_window_msecs ) { + p = current_proc(); + + if ( (p == NULL) || !(p->p_lflag & P_LLOW_PRI_IO)) { + if (!(p->p_lflag & P_LBACKGROUND_IO)) + microuptime(&last_normal_IO_timestamp); + } else { + microuptime(&last_lowpri_IO_timestamp); + + elapsed = last_lowpri_IO_timestamp; + timevalsub(&elapsed, &last_normal_IO_timestamp); + + lowpri_IO_window.tv_sec = lowpri_IO_window_msecs / 1000; + lowpri_IO_window.tv_usec = (lowpri_IO_window_msecs % 1000) * 1000; + + if (timevalcmp(&elapsed, &lowpri_IO_window, <)) { + struct uthread *ut; + + /* + * I'd really like to do the IOSleep here, but + * we may be holding all kinds of filesystem related locks + * and the pages for this I/O marked 'busy'... + * we don't want to cause a normal task to block on + * one of these locks while we're throttling a task marked + * for low priority I/O... we'll mark the uthread and + * do the delay just before we return from the system + * call that triggered this I/O or from vnode_pagein + */ + ut = get_bsdthread_info(current_thread()); + ut->uu_lowpri_delay = lowpri_IO_delay_msecs; + } + } + } + (*bdevsw[major(bdev)].d_strategy)(bp); - if (ap->a_vpp != NULL) - *ap->a_vpp = ap->a_vp; - if (ap->a_bnp != NULL) - *ap->a_bnp = ap->a_bn * (PAGE_SIZE / ap->a_vp->v_specsize); - if (ap->a_runp != NULL) - *ap->a_runp = (MAXPHYSIO / PAGE_SIZE) - 1; - return (0); + return (0); } + /* * This is a noop, simply returning what one has been given. */ -spec_cmap(ap) - struct vop_cmap_args /* { - struct vnode *a_vp; - off_t a_offset; - size_t a_size; - daddr_t *a_bpn; - size_t *a_run; - void *a_poff; - } */ *ap; +int +spec_blockmap(__unused struct vnop_blockmap_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } /* * Device close routine */ -/* ARGSUSED */ +int spec_close(ap) - struct vop_close_args /* { + struct vnop_close_args /* { struct vnode *a_vp; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { register struct vnode *vp = ap->a_vp; dev_t dev = vp->v_rdev; - int (*devclose) __P((dev_t, int, int, struct proc *)); + int (*devclose)(dev_t, int, int, struct proc *); int mode, error; + struct proc *p = vfs_context_proc(ap->a_context); switch (vp->v_type) { @@ -661,17 +751,15 @@ spec_close(ap) * if the reference count is 2 (this last descriptor * plus the session), release the reference from the session. */ - if (vcount(vp) == 2 && ap->a_p && - vp == ap->a_p->p_session->s_ttyvp) { - ap->a_p->p_session->s_ttyvp = NULL; - vrele(vp); + if (vcount(vp) == 2 && p && + vp == p->p_session->s_ttyvp) { + p->p_session->s_ttyvp = NULL; + vnode_rele(vp); } /* - * If the vnode is locked, then we are in the midst - * of forcably closing the device, otherwise we only * close on last reference. */ - if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) + if (vcount(vp) > 1) return (0); devclose = cdevsw[major(dev)].d_close; mode = S_IFCHR; @@ -684,33 +772,30 @@ spec_close(ap) * we must invalidate any in core blocks, so that * we can, for instance, change floppy disks. */ - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); - error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); - VOP_UNLOCK(vp, 0, ap->a_p); + if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context))) + return (error); + + error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); if (error) return (error); /* - * We do not want to really close the device if it - * is still in use unless we are trying to close it - * forcibly. Since every use (buffer, vnode, swap, cmap) + * Since every use (buffer, vnode, swap, blockmap) * holds a reference to the vnode, and because we mark * any other vnodes that alias this device, when the * sum of the reference counts on all the aliased * vnodes descends to one, we are on last close. */ - if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) + if (vcount(vp) > 1) return (0); #else /* DEVFS_IMPLEMENTS_LOCKING */ /* - * We do not want to really close the device if it - * is still in use unless we are trying to close it - * forcibly. Since every use (buffer, vnode, swap, cmap) + * Since every use (buffer, vnode, swap, blockmap) * holds a reference to the vnode, and because we mark * any other vnodes that alias this device, when the * sum of the reference counts on all the aliased * vnodes descends to one, we are on last close. */ - if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) + if (vcount(vp) > 1) return (0); /* @@ -718,7 +803,10 @@ spec_close(ap) * we must invalidate any in core blocks, so that * we can, for instance, change floppy disks. */ - error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); + if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context))) + return (error); + + error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); if (error) return (error); #endif /* DEVFS_IMPLEMENTS_LOCKING */ @@ -730,30 +818,19 @@ spec_close(ap) panic("spec_close: not special"); } - return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); -} - -/* - * Print out the contents of a special device vnode. - */ -spec_print(ap) - struct vop_print_args /* { - struct vnode *a_vp; - } */ *ap; -{ - - printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), - minor(ap->a_vp->v_rdev)); + return ((*devclose)(dev, ap->a_fflag, mode, p)); } /* * Return POSIX pathconf information applicable to special devices. */ +int spec_pathconf(ap) - struct vop_pathconf_args /* { + struct vnop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; + vfs_context_t a_context; } */ *ap; { @@ -782,21 +859,11 @@ spec_pathconf(ap) /* NOTREACHED */ } -int -spec_devblocksize(ap) - struct vop_devblocksize_args /* { - struct vnode *a_vp; - int *a_retval; - } */ *ap; -{ - *ap->a_retval = (ap->a_vp->v_specsize); - return (0); -} - /* * Special device failed operation */ -spec_ebadf() +int +spec_ebadf(__unused void *dummy) { return (EBADF); @@ -805,6 +872,7 @@ spec_ebadf() /* * Special device bad operation */ +int spec_badop() { @@ -815,9 +883,9 @@ spec_badop() /* Blktooff derives file offset from logical block number */ int spec_blktooff(ap) - struct vop_blktooff_args /* { + struct vnop_blktooff_args /* { struct vnode *a_vp; - daddr_t a_lblkno; + daddr64_t a_lblkno; off_t *a_offset; } */ *ap; { @@ -826,42 +894,46 @@ spec_blktooff(ap) switch (vp->v_type) { case VCHR: *ap->a_offset = (off_t)-1; /* failure */ - return (EOPNOTSUPP); + return (ENOTSUP); case VBLK: printf("spec_blktooff: not implemented for VBLK\n"); *ap->a_offset = (off_t)-1; /* failure */ - return (EOPNOTSUPP); + return (ENOTSUP); default: panic("spec_blktooff type"); } /* NOTREACHED */ + + return (0); } /* Offtoblk derives logical block number from file offset */ int spec_offtoblk(ap) - struct vop_offtoblk_args /* { + struct vnop_offtoblk_args /* { struct vnode *a_vp; off_t a_offset; - daddr_t *a_lblkno; + daddr64_t *a_lblkno; } */ *ap; { register struct vnode *vp = ap->a_vp; switch (vp->v_type) { case VCHR: - *ap->a_lblkno = (daddr_t)-1; /* failure */ - return (EOPNOTSUPP); + *ap->a_lblkno = (daddr64_t)-1; /* failure */ + return (ENOTSUP); case VBLK: printf("spec_offtoblk: not implemented for VBLK\n"); - *ap->a_lblkno = (daddr_t)-1; /* failure */ - return (EOPNOTSUPP); + *ap->a_lblkno = (daddr64_t)-1; /* failure */ + return (ENOTSUP); default: panic("spec_offtoblk type"); } /* NOTREACHED */ + + return (0); }