]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/kern/kern_descrip.c
xnu-517.3.7.tar.gz
[apple/xnu.git] / bsd / kern / kern_descrip.c
index cf9ba2d06d3936d8a7ee2d4f35b97122fe13add2..1f6089b4acb0878bc312b57ba39ebe28df2048af 100644 (file)
@@ -1,21 +1,24 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
  * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
  * 
  * @APPLE_LICENSE_HEADER_END@
  */
  * SUCH DAMAGE.
  *
  *     @(#)kern_descrip.c      8.8 (Berkeley) 2/14/95
- *
- *     History:
- *             CHW     8/5/98    Added F_SETSIZE command to truncate without
- *                                     zero filling space 
- *             CHW     7/6/98    Updated Preallocate command to take a structure
- *                               and return output.
- *             CHW     6/25/98   Fixed a bug in the lock call in fcntl 
- *                               Preallocate command
  */
 
 #include <sys/param.h>
@@ -84,6 +79,8 @@
 #include <sys/syslog.h>
 #include <sys/unistd.h>
 #include <sys/resourcevar.h>
+#include <sys/aio_kern.h>
+#include <sys/kern_audit.h>
 
 #include <sys/mount.h>
 
@@ -93,6 +90,8 @@
 struct filelist filehead;      /* head of list of open files */
 int nfiles;                    /* actual number of open files */
 
+static int frele_internal(struct file *);
+
 /*
  * System calls on descriptors.
  */
@@ -103,7 +102,6 @@ getdtablesize(p, uap, retval)
        void *uap;
        register_t *retval;
 {
-
        *retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
        return (0);
 }
@@ -115,7 +113,6 @@ ogetdtablesize(p, uap, retval)
        void *uap;
        register_t *retval;
 {
-
        *retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, NOFILE);
        return (0);
 }
@@ -200,8 +197,7 @@ dup2(p, uap, retval)
                        _fdrelse(fdp, i);
                        goto closeit;
                }
-       }
-       else {
+       } else {
                struct file **fpp;
                char flags;
 closeit:
@@ -214,7 +210,8 @@ closeit:
                if (*(fpp = &fdp->fd_ofiles[new])) {
                        struct file *fp = *fpp;
 
-                       *fpp = NULL; (void) closef(fp, p);
+                       *fpp = NULL;
+                       (void) closef(fp, p);
                }
        }
        return (finishdup(fdp, old, new, retval));
@@ -242,21 +239,24 @@ fcntl(p, uap, retval)
        struct vnode *vp, *devvp;
        int i, tmp, error, error2, flg = F_POSIX;
        struct flock fl;
-        fstore_t alloc_struct;    /* structure for allocate command */
+       fstore_t alloc_struct;    /* structure for allocate command */
        u_int32_t alloc_flags = 0;
        off_t offset;             /* used for F_SETSIZE */
        int newmin;
        struct radvisory ra_struct;
        fbootstraptransfer_t fbt_struct; /* for F_READBOOTSTRAP and F_WRITEBOOTSTRAP */
-        struct log2phys l2p_struct;    /* structure for allocate command */
+       struct log2phys l2p_struct;    /* structure for allocate command */
        daddr_t lbn, bn;
        int devBlockSize = 0;
 
+       AUDIT_ARG(fd, uap->fd);
+       AUDIT_ARG(cmd, uap->cmd);
        if ((u_int)fd >= fdp->fd_nfiles ||
                        (fp = fdp->fd_ofiles[fd]) == NULL ||
                        (fdp->fd_ofileflags[fd] & UF_RESERVED))
                return (EBADF);
        pop = &fdp->fd_ofileflags[fd];
+
        switch (uap->cmd) {
 
        case F_DUPFD:
@@ -285,16 +285,16 @@ fcntl(p, uap, retval)
                fp->f_flag &= ~FCNTLFLAGS;
                fp->f_flag |= FFLAGS((long)uap->arg) & FCNTLFLAGS;
                tmp = fp->f_flag & FNONBLOCK;
-               error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
+               error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
                if (error)
                        return (error);
                tmp = fp->f_flag & FASYNC;
-               error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
+               error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
                if (!error)
                        return (0);
                fp->f_flag &= ~FNONBLOCK;
                tmp = 0;
-               (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
+               (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
                return (error);
 
        case F_GETOWN:
@@ -302,8 +302,7 @@ fcntl(p, uap, retval)
                        *retval = ((struct socket *)fp->f_data)->so_pgid;
                        return (0);
                }
-               error = (*fp->f_ops->fo_ioctl)
-                       (fp, (int)TIOCGPGRP, (caddr_t)retval, p);
+               error = fo_ioctl(fp, (int)TIOCGPGRP, (caddr_t)retval, p);
                *retval = -*retval;
                return (error);
 
@@ -314,15 +313,14 @@ fcntl(p, uap, retval)
                        return (0);
                }
                if ((long)uap->arg <= 0) {
-                       uap->arg = (void *)(-(long)(uap->arg));
+                       uap->arg = (int)(-(long)(uap->arg));
                } else {
                        struct proc *p1 = pfind((long)uap->arg);
                        if (p1 == 0)
                                return (ESRCH);
-                       uap->arg = (void *)(long)p1->p_pgrp->pg_id;
+                       uap->arg = (int)p1->p_pgrp->pg_id;
                }
-               return ((*fp->f_ops->fo_ioctl)
-                       (fp, (int)TIOCSPGRP, (caddr_t)&uap->arg, p));
+               return (fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&uap->arg, p));
 
        case F_SETLKW:
                flg |= F_WAIT;
@@ -332,6 +330,7 @@ fcntl(p, uap, retval)
                if (fp->f_type != DTYPE_VNODE)
                        return (EBADF);
                vp = (struct vnode *)fp->f_data;
+               AUDIT_ARG(vnpath, vp, ARG_VNODE1);
                /* Copy in the lock structure */
                error = copyin((caddr_t)uap->arg, (caddr_t)&fl,
                    sizeof (fl));
@@ -365,6 +364,7 @@ fcntl(p, uap, retval)
                if (fp->f_type != DTYPE_VNODE)
                        return (EBADF);
                vp = (struct vnode *)fp->f_data;
+               AUDIT_ARG(vnpath, vp, ARG_VNODE1);
                /* Copy in the lock structure */
                error = copyin((caddr_t)uap->arg, (caddr_t)&fl,
                    sizeof (fl));
@@ -377,192 +377,204 @@ fcntl(p, uap, retval)
                return (copyout((caddr_t)&fl, (caddr_t)uap->arg,
                    sizeof (fl)));
 
-        case F_PREALLOCATE:
-               
-               /* Copy in the structure */
+       case F_PREALLOCATE:
+               if (fp->f_type != DTYPE_VNODE)
+                       return (EBADF);
+
+               /* make sure that we have write permission */
+               if ((fp->f_flag & FWRITE) == 0)
+                       return (EBADF);
 
                error = copyin((caddr_t)uap->arg, (caddr_t)&alloc_struct,
                    sizeof (alloc_struct));
-
                if (error)
                        return (error);
 
-               /* now set the space allocated to 0 and pass it out in
-                  case we get a parameter checking error */
-               
+               /* now set the space allocated to 0 */
                alloc_struct.fst_bytesalloc = 0;
                
-               error = copyout((caddr_t)&alloc_struct, (caddr_t)uap->arg,
-                   sizeof (alloc_struct));
-
-                       if (error)
-                               return(error);
-
-               /* First make sure that we have write permission */
-
-               if ((fp->f_flag & FWRITE) == 0)
-                       return (EBADF);
-
-
-               /* Do some simple parameter checking */
-
+               /*
+                * Do some simple parameter checking
+                */
 
                /* set up the flags */
 
                alloc_flags |= PREALLOCATE;
                
-               if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
+               if (alloc_struct.fst_flags & F_ALLOCATECONTIG)
                        alloc_flags |= ALLOCATECONTIG;
-               }
 
-                if (alloc_struct.fst_flags & F_ALLOCATEALL) {
-                       alloc_flags |= ALLOCATEALL;
-                       }
+               if (alloc_struct.fst_flags & F_ALLOCATEALL)
+                       alloc_flags |= ALLOCATEALL;
 
-               /* Do any position mode specific stuff.  The only   */
-               /* position mode  supported now is PEOFPOSMODE      */
+               /*
+                * Do any position mode specific stuff.  The only
+                * position mode  supported now is PEOFPOSMODE
+                */
 
                switch (alloc_struct.fst_posmode) {
        
                case F_PEOFPOSMODE:
-
-                       if ((alloc_struct.fst_offset != 0) ||
-                           (alloc_struct.fst_length  < 0))
-                       return (EINVAL);
+                       if (alloc_struct.fst_offset != 0)
+                               return (EINVAL);
 
                        alloc_flags |= ALLOCATEFROMPEOF;
                        break;
 
-               default:
+               case F_VOLPOSMODE:
+                       if (alloc_struct.fst_offset <= 0)
+                               return (EINVAL);
 
-                       return(EINVAL);
+                       alloc_flags |= ALLOCATEFROMVOL;
+                       break;
 
+               default:
+                       return(EINVAL);
                }
 
+               vp = (struct vnode *)fp->f_data;
 
-               /* Now lock the vnode and call allocate to get the space */
-
-               vp = (struct vnode *)fp->f_data;
-
-               VOP_LOCK(vp,LK_EXCLUSIVE,p);
+               /* lock the vnode and call allocate to get the space */
+               error = vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, p);
+               if (error)
+                       return (error);
                error = VOP_ALLOCATE(vp,alloc_struct.fst_length,alloc_flags,
-                                    &alloc_struct.fst_bytesalloc,fp->f_cred,p);
-               VOP_UNLOCK(vp,0,p);
+                                    &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
+                                    fp->f_cred, p);
+               VOP_UNLOCK(vp, 0, p);
 
-               if (error2 = (copyout((caddr_t)&alloc_struct, (caddr_t)uap->arg,
-                   sizeof (alloc_struct)))) {
-                       if (error) {
+               if (error2 = copyout((caddr_t)&alloc_struct,
+                                               (caddr_t)uap->arg,
+                                               sizeof (alloc_struct))) {
+                       if (error)
                                return(error);
-                       } else {
+                       else
                                return(error2);
-                       }
                }
-
                return(error);
                
-        case F_SETSIZE:
+       case F_SETSIZE:
+               if (fp->f_type != DTYPE_VNODE)
+                       return (EBADF);
                
-               /* Copy in the structure */
-
                error = copyin((caddr_t)uap->arg, (caddr_t)&offset,
-                   sizeof (off_t));
-
+                                       sizeof (off_t));
                if (error)
                        return (error);
 
+               /*
+                * Make sure that we are root.  Growing a file
+                * without zero filling the data is a security hole 
+                * root would have access anyway so we'll allow it
+                */
 
-               /* First make sure that we are root.  Growing a file */
-               /* without zero filling the data is a security hole  */
-               /* root would have access anyway so we'll allow it   */
-
-               if (!is_suser()) {
+               if (!is_suser())
                        return (EACCES);
-                       }
-
-               /* Now lock the vnode and call allocate to get the space */
 
-               vp = (struct vnode *)fp->f_data;
+               vp = (struct vnode *)fp->f_data;
 
-               VOP_LOCK(vp,LK_EXCLUSIVE,p);
+               /* lock the vnode and call allocate to get the space */
+               error = vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, p);
+               if (error)
+                       return (error);
                error = VOP_TRUNCATE(vp,offset,IO_NOZEROFILL,fp->f_cred,p);
                VOP_UNLOCK(vp,0,p);
-
                return(error);
-               
-        case F_RDAHEAD:
-               vp = (struct vnode *)fp->f_data;
-         
+
+       case F_RDAHEAD:
+               if (fp->f_type != DTYPE_VNODE)
+                       return (EBADF);
+               vp = (struct vnode *)fp->f_data;
+
                simple_lock(&vp->v_interlock);
                if (uap->arg)
-                     vp->v_flag &= ~VRAOFF;
+                       vp->v_flag &= ~VRAOFF;
                else
-                     vp->v_flag |= VRAOFF;
+                       vp->v_flag |= VRAOFF;
                simple_unlock(&vp->v_interlock);
-
                return (0);
 
-        case F_NOCACHE:
-               vp = (struct vnode *)fp->f_data;
-         
+       case F_NOCACHE:
+               if (fp->f_type != DTYPE_VNODE)
+                       return (EBADF);
+               vp = (struct vnode *)fp->f_data;
+
                simple_lock(&vp->v_interlock);
                if (uap->arg)
-                       vp->v_flag |= VNOCACHE_DATA;
+                       vp->v_flag |= VNOCACHE_DATA;
                else
-                       vp->v_flag &= ~VNOCACHE_DATA;
+                       vp->v_flag &= ~VNOCACHE_DATA;
                simple_unlock(&vp->v_interlock);
-
                return (0);
 
        case F_RDADVISE:
-               vp = (struct vnode *)fp->f_data;
+               if (fp->f_type != DTYPE_VNODE)
+                       return (EBADF);
+               vp = (struct vnode *)fp->f_data;
+
+               if (error = copyin((caddr_t)uap->arg,
+                                       (caddr_t)&ra_struct, sizeof (ra_struct)))
+                       return(error);
+               return (VOP_IOCTL(vp, 1, (caddr_t)&ra_struct, 0, fp->f_cred, p));
+
+       case F_CHKCLEAN:
+               /*
+                * used by regression test to determine if 
+                * all the dirty pages (via write) have been cleaned
+                * after a call to 'fsysnc'.
+                */
+               if (fp->f_type != DTYPE_VNODE)
+                       return (EBADF);
+               vp = (struct vnode *)fp->f_data;
+
+               return (VOP_IOCTL(vp, 5, 0, 0, fp->f_cred, p));
 
-               if (error = copyin((caddr_t)uap->arg, (caddr_t)&ra_struct, sizeof (ra_struct)))
-                       return(error);
-               return (VOP_IOCTL(vp, 1, &ra_struct, 0, fp->f_cred, p));
-               
        case F_READBOOTSTRAP:
        case F_WRITEBOOTSTRAP:
-               
-               /* Copy in the structure */
+               if (fp->f_type != DTYPE_VNODE)
+                       return (EBADF);
 
                error = copyin((caddr_t)uap->arg, (caddr_t)&fbt_struct,
-                   sizeof (fbt_struct));
-
+                               sizeof (fbt_struct));
                if (error)
                        return (error);
 
-
                if (uap->cmd == F_WRITEBOOTSTRAP) {
-                 /* First make sure that we are root.  Updating the */
-                 /* bootstrap on a disk could be a security hole */
-
-                 if (!is_suser()) {
-                   return (EACCES);
-                 }
-               };
-
-               /* Now lock the vnode and call VOP_IOCTL to handle the I/O: */
-
-               vp = (struct vnode *)fp->f_data;
-               if (vp->v_tag != VT_HFS) {
-                 error = EINVAL;
-               } else {
-                 VOP_LOCK(vp,LK_EXCLUSIVE,p);
-                 error = VOP_IOCTL(vp, (uap->cmd == F_WRITEBOOTSTRAP) ? 3 : 2, &fbt_struct, 0, fp->f_cred, p);
-                 VOP_UNLOCK(vp,0,p);
-               };
+                 /*
+                  * Make sure that we are root.  Updating the
+                  * bootstrap on a disk could be a security hole
+                  */
+                       if (!is_suser())
+                               return (EACCES);
+               }
 
+               vp = (struct vnode *)fp->f_data;
+               if (vp->v_tag != VT_HFS)        /* XXX */
+                       error = EINVAL;
+               else {
+                       /* lock the vnode and call VOP_IOCTL to handle the I/O */
+                       error = vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, p);
+                       if (error)
+                               return (error);
+                       error = VOP_IOCTL(vp, (uap->cmd == F_WRITEBOOTSTRAP) ? 3 : 2,
+                                       (caddr_t)&fbt_struct, 0, fp->f_cred, p);
+                       VOP_UNLOCK(vp,0,p);
+               }
                return(error);
-               
-        case F_LOG2PHYS:
+
+       case F_LOG2PHYS:
                if (fp->f_type != DTYPE_VNODE)
                        return (EBADF);
                vp = (struct vnode *)fp->f_data;
-               VOP_LOCK(vp, LK_EXCLUSIVE, p);
-               if (VOP_OFFTOBLK(vp, fp->f_offset, &lbn))
-                       panic("fcntl LOG2PHYS OFFTOBLK");
-               if (VOP_BLKTOOFF(vp, lbn, &offset))
-                       panic("fcntl LOG2PHYS BLKTOOFF1");
+               error = vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, p);
+               if (error)
+                       return (error);
+               error = VOP_OFFTOBLK(vp, fp->f_offset, &lbn);
+               if (error)
+                       return (error);
+               error = VOP_BLKTOOFF(vp, lbn, &offset);
+               if (error)
+                       return (error);
                error = VOP_BMAP(vp, lbn, &devvp, &bn, 0);
                VOP_DEVBLOCKSIZE(devvp, &devBlockSize);
                VOP_UNLOCK(vp, 0, p);
@@ -577,7 +589,32 @@ fcntl(p, uap, retval)
                }
                return (error);
 
+       case F_GETPATH: {
+               char *pathbuf;
+               int len;
+               extern int vn_getpath(struct vnode *vp, char *pathbuf, int *len);
+
+               if (fp->f_type != DTYPE_VNODE)
+                       return (EBADF);
+               vp = (struct vnode *)fp->f_data;
+
+               len = MAXPATHLEN;
+               MALLOC(pathbuf, char *, len, M_TEMP, M_WAITOK);
+               error = vn_getpath(vp, pathbuf, &len);
+               if (error == 0)
+                       error = copyout((caddr_t)pathbuf, (caddr_t)uap->arg, len);
+               FREE(pathbuf, M_TEMP);
+               return error;
+       }
 
+       case F_FULLFSYNC: {
+               if (fp->f_type != DTYPE_VNODE)
+                       return (EBADF);
+               vp = (struct vnode *)fp->f_data;
+
+               return (VOP_IOCTL(vp, 6, (caddr_t)NULL, 0, fp->f_cred, p));
+       }
+           
        default:
                return (EINVAL);
        }
@@ -630,6 +667,16 @@ close(p, uap, retval)
                        (fp = fdp->fd_ofiles[fd]) == NULL ||
                        (fdp->fd_ofileflags[fd] & UF_RESERVED))
                return (EBADF);
+
+       /* Keep people from using the filedesc while we are closing it */
+       fdp->fd_ofileflags[fd] |= UF_RESERVED;
+               
+       /* cancel all async IO requests that can be cancelled. */
+       _aio_close( p, fd );
+
+        if (fd < fdp->fd_knlistsize)
+               knote_fdclose(p, fd);
+
        _fdrelse(fdp, fd);
        return (closef(fp, p));
 }
@@ -654,6 +701,7 @@ fstat(p, uap, retval)
        struct stat ub;
        int error;
 
+       AUDIT_ARG(fd, uap->fd);
        if ((u_int)fd >= fdp->fd_nfiles ||
                        (fp = fdp->fd_ofiles[fd]) == NULL ||
                        (fdp->fd_ofileflags[fd] & UF_RESERVED))
@@ -662,6 +710,9 @@ fstat(p, uap, retval)
 
        case DTYPE_VNODE:
                error = vn_stat((struct vnode *)fp->f_data, &ub, p);
+               if (error == 0) {
+                       AUDIT_ARG(vnpath, (struct vnode *)fp->f_data, ARG_VNODE1);
+               }
                break;
 
        case DTYPE_SOCKET:
@@ -671,6 +722,11 @@ fstat(p, uap, retval)
        case DTYPE_PSXSHM:
                error = pshm_stat((void *)fp->f_data, &ub);
                break;
+
+       case DTYPE_KQUEUE:
+         error = kqueue_stat(fp, &ub, p);
+         break;
+
        default:
                panic("fstat");
                /*NOTREACHED*/
@@ -746,6 +802,7 @@ fpathconf(p, uap, retval)
        struct file *fp;
        struct vnode *vp;
 
+       AUDIT_ARG(fd, uap->fd);
        if ((u_int)fd >= fdp->fd_nfiles ||
                        (fp = fdp->fd_ofiles[fd]) == NULL ||
                        (fdp->fd_ofileflags[fd] & UF_RESERVED))
@@ -760,6 +817,8 @@ fpathconf(p, uap, retval)
 
        case DTYPE_VNODE:
                vp = (struct vnode *)fp->f_data;
+               AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
                return (VOP_PATHCONF(vp, uap->name, retval));
 
        default:
@@ -933,11 +992,6 @@ falloc(p, resultfp, resultfd)
        nfiles++;
        MALLOC_ZONE(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
        bzero(fp, sizeof(struct file));
-       if (fq = p->p_fd->fd_ofiles[0]) {
-               LIST_INSERT_AFTER(fq, fp, f_list);
-       } else {
-               LIST_INSERT_HEAD(&filehead, fp, f_list);
-       }
        p->p_fd->fd_ofiles[i] = fp;
        fp->f_count = 1;
        fp->f_cred = p->p_ucred;
@@ -946,6 +1000,11 @@ falloc(p, resultfp, resultfd)
                *resultfp = fp;
        if (resultfd)
                *resultfd = i;
+       if (fq = p->p_fd->fd_ofiles[0]) {
+               LIST_INSERT_AFTER(fq, fp, f_list);
+       } else {
+               LIST_INSERT_HEAD(&filehead, fp, f_list);
+       }
        return (0);
 }
 
@@ -965,10 +1024,11 @@ ffree(fp)
                fp->f_cred = NOCRED;
                crfree(cred);
        }
-#if 1 || DIAGNOSTIC
-       fp->f_count = 0;
-#endif
+
        nfiles--;
+       memset(fp, 0xff, sizeof *fp);
+       fp->f_count = (short)0xffff;
+
        FREE_ZONE(fp, sizeof *fp, M_FILE);
 }
 
@@ -985,6 +1045,9 @@ fdexec(p)
                if ((*flags & (UF_RESERVED|UF_EXCLOSE)) == UF_EXCLOSE) {
                        register struct file *fp = *fpp;
 
+                        if (i < fdp->fd_knlistsize)
+                                knote_fdclose(p, i);
+
                        *fpp = NULL; *flags = 0;
                        if (i == fdp->fd_lastfile && i > 0)
                                fdp->fd_lastfile--;
@@ -1046,6 +1109,26 @@ fdcopy(p)
                (void) memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags,
                                        i * sizeof *fdp->fd_ofileflags);
 
+               /*
+                * kq descriptors cannot be copied.
+                */
+               if (newfdp->fd_knlistsize != -1) {
+                       fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
+                       for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
+                               if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
+                                       *fpp = NULL;
+                                       if (i < newfdp->fd_freefile)
+                                               newfdp->fd_freefile = i;
+                               }
+                               if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
+                                       newfdp->fd_lastfile--;
+                       }
+                       newfdp->fd_knlist = NULL;
+                       newfdp->fd_knlistsize = -1;
+                       newfdp->fd_knhash = NULL;
+                       newfdp->fd_knhashmask = 0;
+               }
+
                fpp = newfdp->fd_ofiles;
                flags = newfdp->fd_ofileflags;
                for (i = newfdp->fd_lastfile; i-- >= 0; fpp++, flags++)
@@ -1055,8 +1138,7 @@ fdcopy(p)
                                *fpp = NULL;
                                *flags = 0;
                        }
-       }
-       else
+       } else
                (void) memset(newfdp->fd_ofiles, 0, i * OFILESIZE);
 
        return (newfdp);
@@ -1069,29 +1151,98 @@ void
 fdfree(p)
        struct proc *p;
 {
-       register struct filedesc *fdp;
-       register struct file **fpp;
-       register int i;
+       struct filedesc *fdp;
+       struct file *fp;
+       int i;
+       struct vnode *tvp;
 
+       /* Certain daemons might not have file descriptors */
        if ((fdp = p->p_fd) == NULL)
                return;
+
        if (--fdp->fd_refcnt > 0)
                return;
-       p->p_fd = NULL;
+
+       /* Last reference: the structure can't change out from under us */
        if (fdp->fd_nfiles > 0) {
-               fpp = fdp->fd_ofiles;
-               for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
-                       if (*fpp)
-                               (void) closef(*fpp, p);
+               for (i = fdp->fd_lastfile; i >= 0; i--)
+#if 1  /* WORKAROUND */
+                       /*
+                        * Merlot: need to remove the bogus f_data check
+                        * from the following "if" statement.  It's there
+                        * because of the network/kernel funnel race on a
+                        * close of a socket vs. fdfree on exit.  See
+                        * Radar rdar://problem/3365650 for details, but
+                        * the sort version is the commment before the "if"
+                        * above is wrong under certain circumstances.
+                        *
+                        * We have to do this twice, in case knote_fdclose()
+                        * results in a block.
+                        *
+                        * This works because an fdfree() will set all fields
+                        * in the struct file to -1.
+                        */
+                       if ((fp = fdp->fd_ofiles[i]) != NULL &&
+                               fp->f_data != (caddr_t)-1) {
+                               if (i < fdp->fd_knlistsize)
+                                       knote_fdclose(p, i);
+                               if (fp->f_data != (caddr_t)-1)
+                                       (void) closef(fp, p);
+                       }
+#else  /* !WORKAROUND */
+                       if ((fp = fdp->fd_ofiles[i]) != NULL) {
+                               if (i < fdp->fd_knlistsize)
+                                       knote_fdclose(p, i);
+                               (void) closef(fp, p);
+                       }
+#endif /* !WORKAROUND */
                FREE_ZONE(fdp->fd_ofiles,
                                fdp->fd_nfiles * OFILESIZE, M_OFILETABL);
        }
-       vrele(fdp->fd_cdir);
-       if (fdp->fd_rdir)
-               vrele(fdp->fd_rdir);
+
+       tvp = fdp->fd_cdir;
+       fdp->fd_cdir = NULL;
+       vrele(tvp);
+
+       if (fdp->fd_rdir) {
+               tvp = fdp->fd_rdir;
+               fdp->fd_rdir = NULL;
+               vrele(tvp);
+       }
+
+       if (fdp->fd_knlist)
+               FREE(fdp->fd_knlist, M_KQUEUE);
+       if (fdp->fd_knhash)
+               FREE(fdp->fd_knhash, M_KQUEUE);
+
        FREE_ZONE(fdp, sizeof *fdp, M_FILEDESC);
 }
 
+static int
+closef_finish(fp, p)
+       register struct file *fp;
+       register struct proc *p;
+{
+       struct vnode *vp;
+       struct flock lf;
+       int error;
+
+       if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
+               lf.l_whence = SEEK_SET;
+               lf.l_start = 0;
+               lf.l_len = 0;
+               lf.l_type = F_UNLCK;
+               vp = (struct vnode *)fp->f_data;
+               (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
+       }
+       if (fp->f_ops)
+               error = fo_close(fp, p);
+       else
+               error = 0;
+       ffree(fp);
+       return (error);
+}
+
 /*
  * Internal form of close.
  * Decrement reference count on file structure.
@@ -1125,22 +1276,9 @@ closef(fp, p)
                vp = (struct vnode *)fp->f_data;
                (void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
        }
-       if (frele(fp) > 0)
+       if (frele_internal(fp) > 0)
                return (0);
-       if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
-               lf.l_whence = SEEK_SET;
-               lf.l_start = 0;
-               lf.l_len = 0;
-               lf.l_type = F_UNLCK;
-               vp = (struct vnode *)fp->f_data;
-               (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
-       }
-       if (fp->f_ops)
-               error = (*fp->f_ops->fo_close)(fp, p);
-       else
-               error = 0;
-       ffree(fp);
-       return (error);
+       return(closef_finish(fp, p));
 }
 
 /*
@@ -1167,6 +1305,7 @@ flock(p, uap, retval)
        struct vnode *vp;
        struct flock lf;
 
+       AUDIT_ARG(fd, uap->fd);
        if ((u_int)fd >= fdp->fd_nfiles ||
                        (fp = fdp->fd_ofiles[fd]) == NULL ||
                        (fdp->fd_ofileflags[fd] & UF_RESERVED))
@@ -1174,6 +1313,7 @@ flock(p, uap, retval)
        if (fp->f_type != DTYPE_VNODE)
                return (EOPNOTSUPP);
        vp = (struct vnode *)fp->f_data;
+       AUDIT_ARG(vnpath, vp, ARG_VNODE1);
        lf.l_whence = SEEK_SET;
        lf.l_start = 0;
        lf.l_len = 0;
@@ -1297,22 +1437,55 @@ dupfdopen(fdp, indx, dfd, mode, error)
 int
 fref(struct file *fp)
 {
+       if (fp->f_count == (short)0xffff)
+               return (-1);
        if (++fp->f_count <= 0)
                panic("fref: f_count");
        return ((int)fp->f_count);
 }
 
-int
-frele(struct file *fp)
+static int 
+frele_internal(struct file *fp)
 {
+       if (fp->f_count == (short)0xffff)
+               panic("frele: stale");
        if (--fp->f_count < 0)
                panic("frele: count < 0");
        return ((int)fp->f_count);
 }
 
+
+int
+frele(struct file *fp)
+{
+       int count;
+       funnel_t * fnl;
+       extern int disable_funnel;
+
+       fnl = thread_funnel_get();
+       /*
+        * If the funnels are merged then atleast a funnel should be held
+        * else frele should come in with kernel funnel only
+        */
+       if (!disable_funnel && (fnl != kernel_flock)) {
+               panic("frele: kernel funnel not held");
+
+       } else if  (fnl == THR_FUNNEL_NULL) {
+               panic("frele: no funnel held");
+       }
+
+       if ((count = frele_internal(fp)) == 0) {
+               /* some one closed the fd while we were blocked */
+               (void)closef_finish(fp, current_proc());
+       }
+       return(count);
+}
+
 int
 fcount(struct file *fp)
 {
+       if (fp->f_count == (short)0xffff)
+               panic("fcount: stale");
        return ((int)fp->f_count);
 }