+
+
+struct x_constraints {
+ u_int32_t x_maxreadcnt;
+ u_int32_t x_maxsegreadsize;
+ u_int32_t x_maxsegwritesize;
+};
+
+
+void
+vfs_io_attributes(vp, flags, iosize, vectors)
+ struct vnode *vp;
+ int flags; /* B_READ or B_WRITE */
+ int *iosize;
+ int *vectors;
+{
+ struct mount *mp;
+
+ /* start with "reasonable" defaults */
+ *iosize = MAXPHYS;
+ *vectors = 32;
+
+ mp = vp->v_mount;
+ if (mp != NULL) {
+ switch (flags) {
+ case B_READ:
+ if (mp->mnt_kern_flag & MNTK_IO_XINFO)
+ *iosize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxreadcnt;
+ else
+ *iosize = mp->mnt_maxreadcnt;
+ *vectors = mp->mnt_segreadcnt;
+ break;
+ case B_WRITE:
+ *iosize = mp->mnt_maxwritecnt;
+ *vectors = mp->mnt_segwritecnt;
+ break;
+ default:
+ break;
+ }
+ if (*iosize == 0)
+ *iosize = MAXPHYS;
+ if (*vectors == 0)
+ *vectors = 32;
+ }
+ return;
+}
+
+__private_extern__
+void
+vfs_io_maxsegsize(vp, flags, maxsegsize)
+ struct vnode *vp;
+ int flags; /* B_READ or B_WRITE */
+ int *maxsegsize;
+{
+ struct mount *mp;
+
+ /* start with "reasonable" default */
+ *maxsegsize = MAXPHYS;
+
+ mp = vp->v_mount;
+ if (mp != NULL) {
+ switch (flags) {
+ case B_READ:
+ if (mp->mnt_kern_flag & MNTK_IO_XINFO)
+ *maxsegsize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegreadsize;
+ else
+ /*
+ * if the extended info doesn't exist
+ * then use the maxread I/O size as the
+ * max segment size... this is the previous behavior
+ */
+ *maxsegsize = mp->mnt_maxreadcnt;
+ break;
+ case B_WRITE:
+ if (mp->mnt_kern_flag & MNTK_IO_XINFO)
+ *maxsegsize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegwritesize;
+ else
+ /*
+ * if the extended info doesn't exist
+ * then use the maxwrite I/O size as the
+ * max segment size... this is the previous behavior
+ */
+ *maxsegsize = mp->mnt_maxwritecnt;
+ break;
+ default:
+ break;
+ }
+ if (*maxsegsize == 0)
+ *maxsegsize = MAXPHYS;
+ }
+}
+
+
+#include <sys/disk.h>
+
+
+int
+vfs_init_io_attributes(devvp, mp)
+ struct vnode *devvp;
+ struct mount *mp;
+{
+ int error;
+ off_t readblockcnt;
+ off_t writeblockcnt;
+ off_t readmaxcnt;
+ off_t writemaxcnt;
+ off_t readsegcnt;
+ off_t writesegcnt;
+ off_t readsegsize;
+ off_t writesegsize;
+ u_long blksize;
+
+ u_int64_t temp;
+
+ struct proc *p = current_proc();
+ struct ucred *cred = p->p_ucred;
+
+ int isvirtual = 0;
+ /*
+ * determine if this mount point exists on the same device as the root
+ * partition... if so, then it comes under the hard throttle control
+ */
+ int thisunit = -1;
+ static int rootunit = -1;
+ extern struct vnode *rootvp;
+
+ if (rootunit == -1) {
+ if (VOP_IOCTL(rootvp, DKIOCGETBSDUNIT, (caddr_t)&rootunit, 0, cred, p))
+ rootunit = -1;
+ else if (rootvp == devvp)
+ mp->mnt_kern_flag |= MNTK_ROOTDEV;
+ }
+ if (devvp != rootvp && rootunit != -1) {
+ if (VOP_IOCTL(devvp, DKIOCGETBSDUNIT, (caddr_t)&thisunit, 0, cred, p) == 0) {
+ if (thisunit == rootunit)
+ mp->mnt_kern_flag |= MNTK_ROOTDEV;
+ }
+ }
+ if (VOP_IOCTL(devvp, DKIOCGETISVIRTUAL, (caddr_t)&isvirtual, 0, cred, p) == 0) {
+ if (isvirtual)
+ mp->mnt_kern_flag |= MNTK_VIRTUALDEV;
+ }
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD,
+ (caddr_t)&readblockcnt, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE,
+ (caddr_t)&writeblockcnt, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTREAD,
+ (caddr_t)&readmaxcnt, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTWRITE,
+ (caddr_t)&writemaxcnt, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD,
+ (caddr_t)&readsegcnt, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE,
+ (caddr_t)&writesegcnt, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTREAD,
+ (caddr_t)&readsegsize, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTWRITE,
+ (caddr_t)&writesegsize, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETBLOCKSIZE,
+ (caddr_t)&blksize, 0, cred, p)))
+ return (error);
+
+
+ if ( !(mp->mnt_kern_flag & MNTK_IO_XINFO)) {
+ MALLOC(mp->mnt_xinfo_ptr, void *, sizeof(struct x_constraints), M_TEMP, M_WAITOK);
+ mp->mnt_kern_flag |= MNTK_IO_XINFO;
+ }
+
+ if (readmaxcnt)
+ temp = (readmaxcnt > UINT32_MAX) ? UINT32_MAX : readmaxcnt;
+ else {
+ if (readblockcnt) {
+ temp = readblockcnt * blksize;
+ temp = (temp > UINT32_MAX) ? UINT32_MAX : temp;
+ } else
+ temp = MAXPHYS;
+ }
+ ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxreadcnt = (u_int32_t)temp;
+
+ if (writemaxcnt)
+ temp = (writemaxcnt > UINT32_MAX) ? UINT32_MAX : writemaxcnt;
+ else {
+ if (writeblockcnt) {
+ temp = writeblockcnt * blksize;
+ temp = (temp > UINT32_MAX) ? UINT32_MAX : temp;
+ } else
+ temp = MAXPHYS;
+ }
+ mp->mnt_maxwritecnt = (u_int32_t)temp;
+
+ if (readsegcnt) {
+ temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt;
+ mp->mnt_segreadcnt = (u_int16_t)temp;
+ }
+ if (writesegcnt) {
+ temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt;
+ mp->mnt_segwritecnt = (u_int16_t)temp;
+ }
+ if (readsegsize)
+ temp = (readsegsize > UINT32_MAX) ? UINT32_MAX : readsegsize;
+ else
+ temp = mp->mnt_maxreadcnt;
+ ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegreadsize = (u_int32_t)temp;
+
+ if (writesegsize)
+ temp = (writesegsize > UINT32_MAX) ? UINT32_MAX : writesegsize;
+ else
+ temp = mp->mnt_maxwritecnt;
+ ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegwritesize = (u_int32_t)temp;
+
+ return (error);
+}
+
+static struct klist fs_klist;
+
+void
+vfs_event_init(void)
+{
+
+ klist_init(&fs_klist);
+}
+
+void
+vfs_event_signal(fsid_t *fsid, u_int32_t event, intptr_t data)
+{
+
+ KNOTE(&fs_klist, event);
+}
+
+/*
+ * return the number of mounted filesystems.
+ */
+static int
+sysctl_vfs_getvfscnt(void)
+{
+ struct mount *mp;
+ int ret = 0;
+
+ simple_lock(&mountlist_slock);
+ CIRCLEQ_FOREACH(mp, &mountlist, mnt_list)
+ ret++;
+ simple_unlock(&mountlist_slock);
+ return (ret);
+}
+
+/*
+ * fill in the array of fsid_t's up to a max of 'count', the actual
+ * number filled in will be set in '*actual'. If there are more fsid_t's
+ * than room in fsidlst then ENOMEM will be returned and '*actual' will
+ * have the actual count.
+ * having *actual filled out even in the error case is depended upon.
+ */
+static int
+sysctl_vfs_getvfslist(fsid_t *fsidlst, int count, int *actual)
+{
+ struct mount *mp;
+
+ *actual = 0;
+ simple_lock(&mountlist_slock);
+ CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
+ (*actual)++;
+ if (*actual <= count)
+ fsidlst[(*actual) - 1] = mp->mnt_stat.f_fsid;
+ }
+ simple_unlock(&mountlist_slock);
+ return (*actual <= count ? 0 : ENOMEM);
+}
+
+static int
+sysctl_vfs_vfslist SYSCTL_HANDLER_ARGS
+{
+ int actual, error;
+ size_t space;
+ fsid_t *fsidlst;
+
+ /* This is a readonly node. */
+ if (req->newptr != NULL)
+ return (EPERM);
+
+ /* they are querying us so just return the space required. */
+ if (req->oldptr == NULL) {
+ req->oldidx = sysctl_vfs_getvfscnt() * sizeof(fsid_t);
+ return 0;
+ }
+again:
+ /*
+ * Retrieve an accurate count of the amount of space required to copy
+ * out all the fsids in the system.
+ */
+ space = req->oldlen;
+ req->oldlen = sysctl_vfs_getvfscnt() * sizeof(fsid_t);
+
+ /* they didn't give us enough space. */
+ if (space < req->oldlen)
+ return (ENOMEM);
+
+ MALLOC(fsidlst, fsid_t *, req->oldlen, M_TEMP, M_WAITOK);
+ error = sysctl_vfs_getvfslist(fsidlst, req->oldlen / sizeof(fsid_t),
+ &actual);
+ /*
+ * If we get back ENOMEM, then another mount has been added while we
+ * slept in malloc above. If this is the case then try again.
+ */
+ if (error == ENOMEM) {
+ FREE(fsidlst, M_TEMP);
+ req->oldlen = space;
+ goto again;
+ }
+ if (error == 0) {
+ error = SYSCTL_OUT(req, fsidlst, actual * sizeof(fsid_t));
+ }
+ FREE(fsidlst, M_TEMP);
+ return (error);
+}
+
+/*
+ * Do a sysctl by fsid.
+ */
+static int
+sysctl_vfs_ctlbyfsid SYSCTL_HANDLER_ARGS
+{
+ struct vfsidctl vc;
+ struct mount *mp;
+ struct statfs *sp;
+ struct proc *p;
+ int *name;
+ int error, flags, namelen;
+
+ name = arg1;
+ namelen = arg2;
+ p = req->p;
+
+ error = SYSCTL_IN(req, &vc, sizeof(vc));
+ if (error)
+ return (error);
+ if (vc.vc_vers != VFS_CTL_VERS1)
+ return (EINVAL);
+ mp = vfs_getvfs(&vc.vc_fsid);
+ if (mp == NULL)
+ return (ENOENT);
+ /* reset so that the fs specific code can fetch it. */
+ req->newidx = 0;
+ /*
+ * Note if this is a VFS_CTL then we pass the actual sysctl req
+ * in for "oldp" so that the lower layer can DTRT and use the
+ * SYSCTL_IN/OUT routines.
+ */
+ if (mp->mnt_op->vfs_sysctl != NULL) {
+ error = mp->mnt_op->vfs_sysctl(name, namelen,
+ req, NULL, NULL, 0, req->p);
+ if (error != EOPNOTSUPP)
+ return (error);
+ }
+ switch (name[0]) {
+ case VFS_CTL_UMOUNT:
+ VCTLTOREQ(&vc, req);
+ error = SYSCTL_IN(req, &flags, sizeof(flags));
+ if (error)
+ break;
+ error = safedounmount(mp, flags, p);
+ break;
+ case VFS_CTL_STATFS:
+ VCTLTOREQ(&vc, req);
+ error = SYSCTL_IN(req, &flags, sizeof(flags));
+ if (error)
+ break;
+ sp = &mp->mnt_stat;
+ if (((flags & MNT_NOWAIT) == 0 || (flags & MNT_WAIT)) &&
+ (error = VFS_STATFS(mp, sp, p)))
+ return (error);
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ error = SYSCTL_OUT(req, sp, sizeof(*sp));
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (error);
+}
+
+static int filt_fsattach(struct knote *kn);
+static void filt_fsdetach(struct knote *kn);
+static int filt_fsevent(struct knote *kn, long hint);
+
+struct filterops fs_filtops =
+ { 0, filt_fsattach, filt_fsdetach, filt_fsevent };
+
+static int
+filt_fsattach(struct knote *kn)
+{
+
+ kn->kn_flags |= EV_CLEAR;
+ KNOTE_ATTACH(&fs_klist, kn);
+ return (0);
+}
+
+static void
+filt_fsdetach(struct knote *kn)
+{
+
+ KNOTE_DETACH(&fs_klist, kn);
+}
+
+static int
+filt_fsevent(struct knote *kn, long hint)
+{
+
+ kn->kn_fflags |= hint;
+ return (kn->kn_fflags != 0);
+}
+
+static int
+sysctl_vfs_noremotehang SYSCTL_HANDLER_ARGS
+{
+ int out, error;
+ pid_t pid;
+ size_t space;
+ struct proc *p;
+
+ /* We need a pid. */
+ if (req->newptr == NULL)
+ return (EINVAL);
+
+ error = SYSCTL_IN(req, &pid, sizeof(pid));
+ if (error)
+ return (error);
+
+ p = pfind(pid < 0 ? -pid : pid);
+ if (p == NULL)
+ return (ESRCH);
+
+ /*
+ * Fetching the value is ok, but we only fetch if the old
+ * pointer is given.
+ */
+ if (req->oldptr != NULL) {
+ out = !((p->p_flag & P_NOREMOTEHANG) == 0);
+ error = SYSCTL_OUT(req, &out, sizeof(out));
+ return (error);
+ }
+
+ /* cansignal offers us enough security. */
+ if (p != req->p && suser(req->p->p_ucred, &req->p->p_acflag) != 0)
+ return (EPERM);
+
+ if (pid < 0)
+ p->p_flag &= ~P_NOREMOTEHANG;
+ else
+ p->p_flag |= P_NOREMOTEHANG;
+
+ return (0);
+}
+/* the vfs.generic. branch. */
+SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RW, 0, "vfs generic hinge");
+/* retreive a list of mounted filesystem fsid_t */
+SYSCTL_PROC(_vfs_generic, OID_AUTO, vfsidlist, CTLFLAG_RD,
+ 0, 0, sysctl_vfs_vfslist, "S,fsid", "List of mounted filesystem ids");
+/* perform operations on filesystem via fsid_t */
+SYSCTL_NODE(_vfs_generic, OID_AUTO, ctlbyfsid, CTLFLAG_RW,
+ sysctl_vfs_ctlbyfsid, "ctlbyfsid");
+SYSCTL_PROC(_vfs_generic, OID_AUTO, noremotehang, CTLFLAG_RW,
+ 0, 0, sysctl_vfs_noremotehang, "I", "noremotehang");
+