xnu-1504.15.3.tar.gz

[apple/xnu.git] / bsd / miscfs / devfs / devfs_tree.c
diff --git a/bsd/miscfs/devfs/devfs_tree.c b/bsd/miscfs/devfs/devfs_tree.c

index ffecc22d2c1c6884b4508142a104eb22fe8d6d93..58aea8eb97614230c48e13f297edcd4db1554dae 100644 (file)
--- a/bsd/miscfs/devfs/devfs_tree.c
+++ b/bsd/miscfs/devfs/devfs_tree.c
@@ -1,23 +1,29 @@
  /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
   *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
   * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
   * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   */
  
  /*
@@ -47,6 +53,12 @@
   * 
   * devfs_tree.c
   */
+/*
+ * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
+ * support for mandatory and extensible security protections.  This notice
+ * is included in support of clause 2.2 (b) of the Apple Public License,
+ * Version 2.0.
+ */
  
  /*
   * HISTORY
@@ -84,17 +96,52 @@
  #include <sys/malloc.h>
  #include <sys/mount_internal.h>
  #include <sys/proc.h>
-#include <sys/vnode.h>
+#include <sys/vnode_internal.h>
  #include <stdarg.h>
-
+#include <libkern/OSAtomic.h>
+#define BSD_KERNEL_PRIVATE     1       /* devfs_make_link() prototype */
  #include "devfs.h"
  #include "devfsdefs.h"
  
-static void    devfs_release_busy(devnode_t *);
+#if CONFIG_MACF
+#include <security/mac_framework.h>
+#endif
+
+#if FDESC
+#include "fdesc.h"
+#endif
+
+typedef struct devfs_vnode_event {
+       vnode_t                 dve_vp;
+       uint32_t                dve_vid;
+       uint32_t                dve_events;
+} *devfs_vnode_event_t;
+
+/* 
+ * Size of stack buffer (fast path) for notifications.  If 
+ * the number of mounts is small, no need to malloc a buffer.
+ */
+#define NUM_STACK_ENTRIES 5 
+
+typedef struct devfs_event_log {
+       size_t                  del_max;
+       size_t                  del_used;
+       devfs_vnode_event_t     del_entries;
+} *devfs_event_log_t;
+       
+
  static void    dev_free_hier(devdirent_t *);
-static int     devfs_propogate(devdirent_t *, devdirent_t *);
-static int     dev_finddir(char *, devnode_t *, int, devnode_t **);
+static int     devfs_propogate(devdirent_t *, devdirent_t *, devfs_event_log_t);
+static int     dev_finddir(const char *, devnode_t *, int, devnode_t **, devfs_event_log_t);
  static int     dev_dup_entry(devnode_t *, devdirent_t *, devdirent_t **, struct devfsmount *);
+void           devfs_ref_node(devnode_t *);
+void           devfs_rele_node(devnode_t *);
+static void    devfs_record_event(devfs_event_log_t, devnode_t*, uint32_t);
+static int     devfs_init_event_log(devfs_event_log_t, uint32_t, devfs_vnode_event_t);
+static void    devfs_release_event_log(devfs_event_log_t, int);
+static void    devfs_bulk_notify(devfs_event_log_t);
+static devdirent_t *devfs_make_node_internal(dev_t, devfstype_t type, uid_t, gid_t, int, 
+                       int (*clone)(dev_t dev, int action), const char *fmt, va_list ap);
  
  
  lck_grp_t      * devfs_lck_grp;
@@ -105,14 +152,17 @@ lck_mtx_t           devfs_mutex;
  devdirent_t *          dev_root = NULL;        /* root of backing tree */
  struct devfs_stats     devfs_stats;            /* hold stats */
  
+static ino_t           devfs_unique_fileno = 0;
+
  #ifdef HIDDEN_MOUNTPOINT
  static struct mount *devfs_hidden_mount;
  #endif /* HIDDEN_MOINTPOINT */
  
  static int devfs_ready = 0;
+static uint32_t devfs_nmountplanes = 0; /* The first plane is not used for a mount */
  
-#define NOCREATE       FALSE
-#define CREATE         TRUE
+#define DEVFS_NOCREATE FALSE
+#define DEVFS_CREATE   TRUE
  
  /*
   * Set up the root directory node in the backing plane
@@ -152,15 +202,26 @@ devfs_sinit(void)
         TAILQ_INIT(&devfs_hidden_mount->mnt_vnodelist);
         TAILQ_INIT(&devfs_hidden_mount->mnt_workerqueue);
         TAILQ_INIT(&devfs_hidden_mount->mnt_newvnodes);
+#if CONFIG_MACF
+       mac_mount_label_init(devfs_hidden_mount);
+       mac_mount_label_associate(vfs_context_kernel(), devfs_hidden_mount);
+#endif
  
         /* Initialize the default IO constraints */
         mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
         mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
+       mp->mnt_ioflags = 0;
+       mp->mnt_realrootvp = NULLVP;
+       mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
  
         devfs_mount(devfs_hidden_mount,"dummy",NULL,NULL,NULL);
         dev_root->de_dnp->dn_dvm 
             = (struct devfsmount *)devfs_hidden_mount->mnt_data;
  #endif /* HIDDEN_MOUNTPOINT */
+#if CONFIG_MACF
+       mac_devfs_label_associate_directory("/", strlen("/"),
+           dev_root->de_dnp, "/");
+#endif
         devfs_ready = 1;
         return (0);
  }
@@ -180,7 +241,7 @@ devfs_sinit(void)
   * called with DEVFS_LOCK held
   ***************************************************************/
  devdirent_t *
-dev_findname(devnode_t * dir, char *name)
+dev_findname(devnode_t * dir, const char *name)
  {
         devdirent_t * newfp;
         if (dir->dn_type != DEV_DIR) return 0;/*XXX*/ /* printf?*/
@@ -201,7 +262,7 @@ dev_findname(devnode_t * dir, char *name)
  
         while(newfp)
         {
-               if(!(strcmp(name,newfp->de_name)))
+               if(!(strncmp(name, newfp->de_name, sizeof(newfp->de_name))))
                         return newfp;
                 newfp = newfp->de_next;
         }
@@ -210,7 +271,7 @@ dev_findname(devnode_t * dir, char *name)
  
  /***********************************************************************
   * Given a starting node (0 for root) and a pathname, return the node  
- * for the end item on the path. It MUST BE A DIRECTORY. If the 'CREATE'
+ * for the end item on the path. It MUST BE A DIRECTORY. If the 'DEVFS_CREATE'
   * option is true, then create any missing nodes in the path and create
   * and return the final node as well.                                  
   * This is used to set up a directory, before making nodes in it..
@@ -218,14 +279,18 @@ dev_findname(devnode_t * dir, char *name)
   * called with DEVFS_LOCK held
   ***********************************************************************/
  static int
-dev_finddir(char * path, 
+dev_finddir(const char * path, 
             devnode_t * dirnode,
             int create, 
-           devnode_t * * dn_pp)
+           devnode_t * * dn_pp,
+           devfs_event_log_t delp)
  {
         devnode_t *     dnp = NULL;
         int             error = 0;
-       char *          scan;
+       const char *            scan;
+#if CONFIG_MACF
+       char            fullpath[DEVMAXPATHSIZE];
+#endif
  
  
         if (!dirnode) /* dirnode == NULL means start at root */
@@ -237,6 +302,9 @@ dev_finddir(char * path,
         if (strlen(path) > (DEVMAXPATHSIZE - 1)) 
             return ENAMETOOLONG;
  
+#if CONFIG_MACF
+       strlcpy (fullpath, path, DEVMAXPATHSIZE);
+#endif
         scan = path;
  
         while (*scan == '/') 
@@ -247,7 +315,7 @@ dev_finddir(char * path,
         while (1) {
             char                component[DEVMAXPATHSIZE];
             devdirent_t *       dirent_p;
-           char *              start;
+           const char *        start;
  
             if (*scan == 0) { 
                 /* we hit the end of the string, we're done */
@@ -258,8 +326,7 @@ dev_finddir(char * path,
             while (*scan != '/' && *scan)
                 scan++;
  
-           strncpy(component, start, scan - start);
-               component[ scan - start ] = '\0';
+           strlcpy(component, start, scan - start);
             if (*scan == '/')
                 scan++;
  
@@ -281,7 +348,13 @@ dev_finddir(char * path,
                 if (error)
                     break;
                 dnp = dirent_p->de_dnp;
-               devfs_propogate(dirnode->dn_typeinfo.Dir.myname, dirent_p);
+#if CONFIG_MACF
+               mac_devfs_label_associate_directory(
+                   dirnode->dn_typeinfo.Dir.myname->de_name, 
+                   strlen(dirnode->dn_typeinfo.Dir.myname->de_name),
+                   dnp, fullpath);
+#endif
+               devfs_propogate(dirnode->dn_typeinfo.Dir.myname, dirent_p, delp);
             }
             dirnode = dnp; /* continue relative to this directory */
         }
@@ -299,7 +372,7 @@ dev_finddir(char * path,
   * called with DEVFS_LOCK held
   ***********************************************************************/
  int
-dev_add_name(char * name, devnode_t * dirnode, __unused devdirent_t * back, 
+dev_add_name(const char * name, devnode_t * dirnode, __unused devdirent_t * back, 
      devnode_t * dnp, devdirent_t * *dirent_pp)
  {
         devdirent_t *   dirent_p = NULL;
@@ -384,7 +457,7 @@ dev_add_name(char * name, devnode_t * dirnode, __unused devdirent_t * back,
         /*
          * put the name into the directory entry.
          */
-       strcpy(dirent_p->de_name, name);
+       strlcpy(dirent_p->de_name, name, DEVMAXNAMESIZE);
  
  
         /*
@@ -418,7 +491,7 @@ dev_add_name(char * name, devnode_t * dirnode, __unused devdirent_t * back,
   * reused. (Is a DIR, or we select SPLIT_DEVS at compile time)
   * typeinfo gives us info to make our node if we don't have a prototype.
   * If typeinfo is null and proto exists, then the typeinfo field of
- * the proto is used intead in the CREATE case.
+ * the proto is used intead in the DEVFS_CREATE case.
   * note the 'links' count is 0 (except if a dir)
   * but it is only cleared on a transition
   * so this is ok till we link it to something
@@ -480,6 +553,10 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto,
                 *(dnp->dn_prevsiblingp) = dnp;
                 dnp->dn_nextsibling = proto;
                 proto->dn_prevsiblingp = &(dnp->dn_nextsibling);
+#if CONFIG_MACF
+               mac_devfs_label_init(dnp);
+               mac_devfs_label_copy(proto->dn_label, dnp->dn_label);
+#endif
         } else {
                 struct timeval tv;
  
@@ -494,8 +571,14 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto,
                 dnp->dn_atime.tv_sec = tv.tv_sec;
                 dnp->dn_mtime.tv_sec = tv.tv_sec;
                 dnp->dn_ctime.tv_sec = tv.tv_sec;
+#if CONFIG_MACF
+               mac_devfs_label_init(dnp);
+#endif
         }
         dnp->dn_dvm = dvm;
+       dnp->dn_refcount = 0;
+       dnp->dn_ino = devfs_unique_fileno;
+       devfs_unique_fileno++;
  
         /*
          * fill out the dev node according to type
@@ -536,9 +619,8 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto,
                         FREE(dnp,M_DEVFSNODE);
                         return ENOMEM;
                 }
-               strncpy(dnp->dn_typeinfo.Slnk.name, typeinfo->Slnk.name,
-                       typeinfo->Slnk.namelen);
-               dnp->dn_typeinfo.Slnk.name[typeinfo->Slnk.namelen] = '\0';
+               strlcpy(dnp->dn_typeinfo.Slnk.name, typeinfo->Slnk.name,
+                       typeinfo->Slnk.namelen + 1);
                 dnp->dn_typeinfo.Slnk.namelen = typeinfo->Slnk.namelen;
                 DEVFS_INCR_STRINGSPACE(dnp->dn_typeinfo.Slnk.namelen + 1);
                 dnp->dn_ops = &devfs_vnodeop_p;
@@ -553,6 +635,15 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto,
                 dnp->dn_ops = &devfs_spec_vnodeop_p;
                 dnp->dn_typeinfo.dev = typeinfo->dev;
                 break;
+
+       #if FDESC
+       /* /dev/fd is special */
+       case DEV_DEVFD:
+               dnp->dn_ops = &devfs_devfd_vnodeop_p;
+               dnp->dn_mode |= 0555;   /* default perms */
+               break;
+
+       #endif /* FDESC */
         default:
                 return EINVAL;
         }
@@ -569,10 +660,9 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto,
  void
  devnode_free(devnode_t * dnp)
  {
-    if (dnp->dn_lflags & DN_BUSY) {
-            dnp->dn_lflags |= DN_DELETE;
-           return;
-    }
+#if CONFIG_MACF
+       mac_devfs_label_destroy(dnp);
+#endif
      if (dnp->dn_type == DEV_SLNK) {
          DEVFS_DECR_STRINGSPACE(dnp->dn_typeinfo.Slnk.namelen + 1);
         FREE(dnp->dn_typeinfo.Slnk.name, M_DEVFSNODE);
@@ -597,11 +687,13 @@ devfs_dn_free(devnode_t * dnp)
                         dnp->dn_nextsibling->dn_prevsiblingp = prevp;
                         
                 }
-               if (dnp->dn_vn == NULL) {
-                   devnode_free(dnp); /* no accesses/references */
+
+               /* Can only free if there are no references; otherwise, wait for last vnode to be reclaimed */
+               if (dnp->dn_refcount == 0) {
+                   devnode_free(dnp); 
                 }
                 else {
-                   dnp->dn_delete = TRUE;
+                   dnp->dn_lflags |= DN_DELETE;
                 }
         }
  }
@@ -626,7 +718,7 @@ devfs_dn_free(devnode_t * dnp)
   * called with DEVFS_LOCK held
   ***********************************************************************/
  static int
-devfs_propogate(devdirent_t * parent,devdirent_t * child)
+devfs_propogate(devdirent_t * parent,devdirent_t * child, devfs_event_log_t delp)
  {
         int     error;
         devdirent_t * newnmp;
@@ -634,6 +726,12 @@ devfs_propogate(devdirent_t * parent,devdirent_t * child)
         devnode_t *     pdnp = parent->de_dnp;
         devnode_t *     adnp = parent->de_dnp;
         int type = child->de_dnp->dn_type;
+       uint32_t events;
+       
+       events = (dnp->dn_type == DEV_DIR ? VNODE_EVENT_DIR_CREATED : VNODE_EVENT_FILE_CREATED);
+       if (delp != NULL) {
+               devfs_record_event(delp, pdnp, events);
+       }
  
         /***********************************************
          * Find the other instances of the parent node
@@ -651,11 +749,45 @@ devfs_propogate(devdirent_t * parent,devdirent_t * child)
                                            NULL, dnp, adnp->dn_dvm, 
                                            &newnmp)) != 0) {
                         printf("duplicating %s failed\n",child->de_name);
+               } else {
+                       if (delp != NULL) {
+                               devfs_record_event(delp, adnp, events);
+
+                               /* 
+                                * Slightly subtle.  We're guaranteed that there will
+                                * only be a vnode hooked into this devnode if we're creating
+                                * a new link to an existing node; otherwise, the devnode is new
+                                * and no one can have looked it up yet. If we're making a link,
+                                * then the buffer is large enough for two nodes in each 
+                                * plane; otherwise, there's no vnode and this call will
+                                * do nothing.
+                                */
+                               devfs_record_event(delp, newnmp->de_dnp, VNODE_EVENT_LINK);
+                       }
                 }
         }
         return 0;       /* for now always succeed */
  }
  
+static uint32_t
+remove_notify_count(devnode_t *dnp)
+{
+       uint32_t notify_count = 0;
+       devnode_t *dnp2;
+
+       /* 
+        * Could need to notify for one removed node on each mount and 
+        * one parent for each such node.
+        */
+       notify_count = devfs_nmountplanes;
+       notify_count += dnp->dn_links;  
+       for (dnp2 = dnp->dn_nextsibling; dnp2 != dnp; dnp2 = dnp2->dn_nextsibling) {
+               notify_count += dnp2->dn_links; 
+       }
+
+       return notify_count;
+
+}
  
  /***********************************************************************
   * remove all instances of this devicename [for backing nodes..]
@@ -673,7 +805,12 @@ devfs_remove(void *dirent_p)
         devnode_t * dnp = ((devdirent_t *)dirent_p)->de_dnp;
         devnode_t * dnp2;
         boolean_t   lastlink;
-
+       struct devfs_event_log event_log;
+       uint32_t    log_count = 0;
+       int         do_notify = 0;
+       int         need_free = 0;
+       struct devfs_vnode_event stackbuf[NUM_STACK_ENTRIES];
+       
         DEVFS_LOCK();
  
         if (!devfs_ready) {
@@ -681,6 +818,37 @@ devfs_remove(void *dirent_p)
                 goto out;
         }
  
+       log_count = remove_notify_count(dnp);
+
+       if (log_count > NUM_STACK_ENTRIES) {
+               uint32_t new_count;
+wrongsize:
+               DEVFS_UNLOCK();
+               if (devfs_init_event_log(&event_log, log_count, NULL) == 0) {
+                       do_notify = 1;
+                       need_free = 1;
+               }       
+               DEVFS_LOCK();
+
+               new_count = remove_notify_count(dnp);
+               if (need_free && (new_count > log_count)) {
+                       devfs_release_event_log(&event_log, 1);
+                       need_free = 0;
+                       do_notify = 0;
+                       log_count = log_count * 2;
+                       goto wrongsize;
+               }
+       } else {
+               if (devfs_init_event_log(&event_log, NUM_STACK_ENTRIES, &stackbuf[0]) == 0) {
+                       do_notify = 1;
+               }
+       }
+
+       /* This file has been deleted */
+       if (do_notify != 0) {
+               devfs_record_event(&event_log, dnp, VNODE_EVENT_DELETE);
+       }
+
         /* keep removing the next sibling till only we exist. */
         while ((dnp2 = dnp->dn_nextsibling) != dnp) {
  
@@ -691,9 +859,19 @@ devfs_remove(void *dirent_p)
                 dnp->dn_nextsibling->dn_prevsiblingp = &(dnp->dn_nextsibling);
                 dnp2->dn_nextsibling = dnp2;
                 dnp2->dn_prevsiblingp = &(dnp2->dn_nextsibling);
+                               
+               /* This file has been deleted in this plane */
+               if (do_notify != 0) {
+                       devfs_record_event(&event_log, dnp2, VNODE_EVENT_DELETE);
+               }
+
                 if (dnp2->dn_linklist) {
                         do {
                                 lastlink = (1 == dnp2->dn_links);
+                               /* Each parent of a link to this file has lost a child in this plane */
+                               if (do_notify != 0) {
+                                       devfs_record_event(&event_log, dnp2->dn_linklist->de_parent, VNODE_EVENT_FILE_REMOVED);
+                               }
                                 dev_free_name(dnp2->dn_linklist);
                         } while (!lastlink);
                 }
@@ -707,11 +885,19 @@ devfs_remove(void *dirent_p)
         if (dnp->dn_linklist) {
                 do {
                         lastlink = (1 == dnp->dn_links);
+                       /* Each parent of a link to this file has lost a child */
+                       if (do_notify != 0) {
+                               devfs_record_event(&event_log, dnp->dn_linklist->de_parent, VNODE_EVENT_FILE_REMOVED);
+                       }
                         dev_free_name(dnp->dn_linklist);
                 } while (!lastlink);
         }
  out:
         DEVFS_UNLOCK();
+       if (do_notify != 0) {
+               devfs_bulk_notify(&event_log);
+               devfs_release_event_log(&event_log, need_free);
+       }
  
         return ;
  }
@@ -735,6 +921,7 @@ dev_dup_plane(struct devfsmount *devfs_mp_p)
         if ((error = dev_dup_entry(NULL, dev_root, &new, devfs_mp_p)))
                 return error;
         devfs_mp_p->plane_root = new;
+       devfs_nmountplanes++;
         return error;
  }
  
@@ -756,6 +943,11 @@ devfs_free_plane(struct devfsmount *devfs_mp_p)
                 dev_free_name(dirent_p);
         }
         devfs_mp_p->plane_root = NULL;
+       devfs_nmountplanes--;
+
+       if (devfs_nmountplanes > (devfs_nmountplanes+1)) {
+               panic("plane count wrapped around.\n");
+       }
  }
  
  
@@ -858,12 +1050,12 @@ dev_free_name(devdirent_t * dirent_p)
                         if(dnp->dn_linklist == dirent_p) {
                                 dnp->dn_linklist = dirent_p->de_nextlink;
                         }
-                       dirent_p->de_nextlink->de_prevlinkp 
-                           = dirent_p->de_prevlinkp;
-                       *dirent_p->de_prevlinkp = dirent_p->de_nextlink;
                 }
                 devfs_dn_free(dnp);
         }
+       
+       dirent_p->de_nextlink->de_prevlinkp = dirent_p->de_prevlinkp;
+       *(dirent_p->de_prevlinkp) = dirent_p->de_nextlink;
  
         /*
          * unlink ourselves from the directory on this plane
@@ -922,7 +1114,11 @@ dev_free_hier(devdirent_t * dirent_p)
   * associated, or get a new one and associate it with the dev_node
   *
   * called with DEVFS_LOCK held
- ***************************************************************/
+ *
+ * If an error is returned, then the dnp may have been freed (we
+ * raced with a delete and lost).  A devnode should not be accessed
+ * after devfs_dntovn() fails.
+ ****************************************************************/
  int
  devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, __unused struct proc * p)
  {
@@ -931,13 +1127,25 @@ devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, __unused struct proc * p)
         struct vnode_fsparam vfsp;
         enum vtype vtype = 0;
         int markroot = 0;
+       int n_minor = DEVFS_CLONE_ALLOC; /* new minor number for clone device */
+       
+       /*
+        * We should never come in and find that our devnode has been marked for delete.
+        * The lookup should have held the lock from entry until now; it should not have
+        * been able to find a removed entry. Any other pathway would have just created
+        * the devnode and come here without dropping the devfs lock, so no one would
+        * have a chance to delete.
+        */
+       if (dnp->dn_lflags & DN_DELETE) {
+               panic("devfs_dntovn: DN_DELETE set on a devnode upon entry.");
+       }
+
+       devfs_ref_node(dnp);
  
  retry:
         *vn_pp = NULL;
         vn_p = dnp->dn_vn;
  
-       dnp->dn_lflags |= DN_BUSY;
-
         if (vn_p) { /* already has a vnode */
                 uint32_t vid;
                 
@@ -962,21 +1170,26 @@ retry:
                                  */
                                 vnode_put(vn_p);
                         }
-                       /*
-                        * set the error to EAGAIN
-                        * which will cause devfs_lookup
-                        * to retry this node
+                       
+                       /* 
+                        * This entry is no longer in the namespace.  This is only 
+                        * possible for lookup: no other path would not find an existing
+                        * vnode.  Therefore, ENOENT is a valid result.
                          */
-                       error = EAGAIN;
+                       error = ENOENT;
                 }
                 if ( !error)
                         *vn_pp = vn_p;
  
-               devfs_release_busy(dnp);
-
-               return error;
+               goto out;
         }
  
+       /* 
+        * If we get here, then we've beaten any deletes; 
+        * if someone sets DN_DELETE during a subsequent drop
+        * of the devfs lock, we'll still vend a vnode.
+        */
+
         if (dnp->dn_lflags & DN_CREATE) {
                 dnp->dn_lflags |= DN_CREATEWAIT;
                 msleep(&dnp->dn_lflags, &devfs_mutex, PRIBIO, 0 , 0);
@@ -999,6 +1212,11 @@ retry:
                 case    DEV_CDEV:
                         vtype = (dnp->dn_type == DEV_BDEV) ? VBLK : VCHR;
                         break;
+#if FDESC
+               case    DEV_DEVFD:
+                       vtype = VDIR;
+                       break;
+#endif /* FDESC */
         }
         vfsp.vnfs_mp = dnp->dn_dvm->mount;
         vfsp.vnfs_vtype = vtype;
@@ -1008,10 +1226,28 @@ retry:
         vfsp.vnfs_cnp = 0;
         vfsp.vnfs_vops = *(dnp->dn_ops);
                 
-       if (vtype == VBLK || vtype == VCHR)
+       if (vtype == VBLK || vtype == VCHR) {
+               /*
+                * Ask the clone minor number function for a new minor number
+                * to use for the next device instance.  If an administative
+                * limit has been reached, this function will return -1.
+                */
+               if (dnp->dn_clone != NULL) {
+                       int     n_major = major(dnp->dn_typeinfo.dev);
+
+                       n_minor = (*dnp->dn_clone)(dnp->dn_typeinfo.dev, DEVFS_CLONE_ALLOC);
+                       if (n_minor == -1) {
+                               error = ENOMEM;
+                               goto out;
+                       }
+
+                       vfsp.vnfs_rdev = makedev(n_major, n_minor);;
+               } else {
                 vfsp.vnfs_rdev = dnp->dn_typeinfo.dev;
-       else
+               }
+       } else {
                 vfsp.vnfs_rdev = 0;
+       }
         vfsp.vnfs_filesize = 0;
         vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE;
         /* Tag system files */
@@ -1021,42 +1257,91 @@ retry:
         DEVFS_UNLOCK();
  
         error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vn_p);
+       
+       /* Do this before grabbing the lock */
+       if (error == 0) {
+               vnode_setneedinactive(vn_p);
+       }
  
         DEVFS_LOCK();
  
         if (error == 0) {
-               if ((dnp->dn_vn)) {
-                       panic("devnode already has a vnode?");
-               } else {
-                       dnp->dn_vn = vn_p;
-                       *vn_pp = vn_p;
                         vnode_settag(vn_p, VT_DEVFS);
-               }
+
+                       if ((dnp->dn_clone != NULL) && (dnp->dn_vn != NULLVP) )
+                               panic("devfs_dntovn: cloning device with a vnode?\n");
+
+                       *vn_pp = vn_p;
+
+                       /* 
+                        * Another vnode that has this devnode as its v_data.
+                        * This reference, unlike the one taken at the start
+                        * of the function, persists until a VNOP_RECLAIM
+                        * comes through for this vnode.
+                        */
+                       devfs_ref_node(dnp);
+
+                       /* 
+                        * A cloned vnode is not hooked into the devnode; every lookup
+                        * gets a new vnode.
+                        */
+                       if (dnp->dn_clone == NULL) {
+                               dnp->dn_vn = vn_p;
+                       } 
+       } else if (n_minor != DEVFS_CLONE_ALLOC) {
+               /*
+                * If we failed the create, we need to release the cloned minor
+                * back to the free list.  In general, this is only useful if
+                * the clone function results in a state change in the cloned
+                * device for which the minor number was obtained.  If we get
+                * past this point withouth falling into this case, it's
+                * assumed that any state to be released will be released when
+                * the vnode is dropped, instead.
+                */
+                (void)(*dnp->dn_clone)(dnp->dn_typeinfo.dev, DEVFS_CLONE_FREE);
         }
  
         dnp->dn_lflags &= ~DN_CREATE;
-
         if (dnp->dn_lflags & DN_CREATEWAIT) {
                 dnp->dn_lflags &= ~DN_CREATEWAIT;
                 wakeup(&dnp->dn_lflags);
         }
  
-       devfs_release_busy(dnp);
+out:
+       /* 
+        * Release the reference we took to prevent deletion while we weren't holding the lock.
+        * If not returning success, then dropping this reference could delete the devnode;
+        * no one should access a devnode after a call to devfs_dntovn fails.
+        */
+       devfs_rele_node(dnp);
  
         return error;
  }
  
+/*
+ * Increment refcount on a devnode; prevents free of the node
+ * while the devfs lock is not held.
+ */
+void
+devfs_ref_node(devnode_t *dnp) 
+{
+       dnp->dn_refcount++;
+}
  
-/***********************************************************************
- * called with DEVFS_LOCK held
- ***********************************************************************/
-static void
-devfs_release_busy(devnode_t *dnp) {
-
-        dnp->dn_lflags &= ~DN_BUSY;
+/*
+ * Release a reference on a devnode.  If the devnode is marked for 
+ * free and the refcount is dropped to zero, do the free.
+ */
+void 
+devfs_rele_node(devnode_t *dnp)
+{
+       dnp->dn_refcount--;
+       if (dnp->dn_refcount < 0) {
+               panic("devfs_rele_node: devnode with a negative refcount!\n");
+       } else if ((dnp->dn_refcount == 0) && (dnp->dn_lflags & DN_DELETE))  {
+               devnode_free(dnp);
+       }
  
-       if (dnp->dn_lflags & DN_DELETE)
-               devnode_free(dnp);
  }
  
  /***********************************************************************
@@ -1066,7 +1351,7 @@ devfs_release_busy(devnode_t *dnp) {
   * called with DEVFS_LOCK held
   ***********************************************************************/
  int
-dev_add_entry(char *name, devnode_t * parent, int type, devnode_type_t * typeinfo,
+dev_add_entry(const char *name, devnode_t * parent, int type, devnode_type_t * typeinfo,
               devnode_t * proto, struct devfsmount *dvm, devdirent_t * *nm_pp)
  {
         devnode_t *     dnp;
@@ -1089,6 +1374,69 @@ dev_add_entry(char *name, devnode_t * parent, int type, devnode_type_t * typeinf
         return error;
  }
  
+static void
+devfs_bulk_notify(devfs_event_log_t delp) 
+{
+       uint32_t i;
+       for (i = 0; i < delp->del_used; i++) {
+               devfs_vnode_event_t dvep = &delp->del_entries[i];
+               if (vnode_getwithvid(dvep->dve_vp, dvep->dve_vid) == 0) {
+                       vnode_notify(dvep->dve_vp, dvep->dve_events, NULL);
+                       vnode_put(dvep->dve_vp);
+               }
+       }
+}
+
+static void 
+devfs_record_event(devfs_event_log_t delp, devnode_t *dnp, uint32_t events)
+{
+       if (delp->del_used >= delp->del_max) {
+               panic("devfs event log overflowed.\n");
+       }
+
+       /* Can only notify for nodes that have an associated vnode */
+       if (dnp->dn_vn != NULLVP && vnode_ismonitored(dnp->dn_vn)) {
+               devfs_vnode_event_t dvep = &delp->del_entries[delp->del_used];
+               dvep->dve_vp = dnp->dn_vn;
+               dvep->dve_vid = vnode_vid(dnp->dn_vn);
+               dvep->dve_events = events;
+               delp->del_used++;
+       }
+}
+
+static int
+devfs_init_event_log(devfs_event_log_t delp, uint32_t count, devfs_vnode_event_t buf) 
+{
+       devfs_vnode_event_t dvearr;
+
+       if (buf == NULL)  {
+               MALLOC(dvearr, devfs_vnode_event_t, count * sizeof(struct devfs_vnode_event), M_TEMP, M_WAITOK | M_ZERO);
+               if (dvearr == NULL) {
+                       return ENOMEM;
+               }
+       } else {
+               dvearr = buf;
+       }
+
+       delp->del_max = count;
+       delp->del_used = 0;
+       delp->del_entries = dvearr;
+       return 0;
+}
+
+static void
+devfs_release_event_log(devfs_event_log_t delp, int need_free)
+{
+       if (delp->del_entries == NULL) {
+               panic("Free of devfs notify info that has not been intialized.\n");
+       }
+
+       if (need_free) {
+               FREE(delp->del_entries, M_TEMP);
+       }
+
+       delp->del_entries = NULL;
+}
  
  /*
   * Function: devfs_make_node
@@ -1101,38 +1449,100 @@ dev_add_entry(char *name, devnode_t * parent, int type, devnode_type_t * typeinf
   *   chrblk    - block or character device (DEVFS_CHAR or DEVFS_BLOCK)
   *   uid, gid  - ownership
   *   perms     - permissions
+ *   clone     - minor number cloning function
   *   fmt, ...  - path format string with printf args to format the path name
   * Returns:
   *   A handle to a device node if successful, NULL otherwise.
   */
  void *
-devfs_make_node(dev_t dev, int chrblk, uid_t uid,
-               gid_t gid, int perms, const char *fmt, ...)
+devfs_make_node_clone(dev_t dev, int chrblk, uid_t uid,
+               gid_t gid, int perms, int (*clone)(dev_t dev, int action),
+               const char *fmt, ...)
  {
         devdirent_t *   new_dev = NULL;
-       devnode_t *     dnp;    /* devnode for parent directory */
-       devnode_type_t  typeinfo;
-
-       char *name, *path, buf[256]; /* XXX */
-       int i;
+       devfstype_t     type; 
         va_list ap;
  
+       switch (chrblk) {
+               case DEVFS_CHAR:
+                       type = DEV_CDEV;
+                       break;
+               case DEVFS_BLOCK:
+                       type = DEV_BDEV;
+                       break;
+               default:
+                       goto out;
+       }
  
-       DEVFS_LOCK();
+       va_start(ap, fmt);
+       new_dev = devfs_make_node_internal(dev, type, uid, gid, perms, clone, fmt, ap);
+       va_end(ap);
+out:
+       return new_dev;
+}
+
+
+/*
+ * Function: devfs_make_node
+ *
+ * Purpose
+ *   Create a device node with the given pathname in the devfs namespace.
+ *
+ * Parameters:
+ *   dev       - the dev_t value to associate
+ *   chrblk    - block or character device (DEVFS_CHAR or DEVFS_BLOCK)
+ *   uid, gid  - ownership
+ *   perms     - permissions
+ *   fmt, ...  - path format string with printf args to format the path name
+ * Returns:
+ *   A handle to a device node if successful, NULL otherwise.
+ */
+void *
+devfs_make_node(dev_t dev, int chrblk, uid_t uid,
+               gid_t gid, int perms, const char *fmt, ...)
+{
+       devdirent_t *   new_dev = NULL;
+       devfstype_t type;
+       va_list ap;
  
-       if (!devfs_ready) {
-               printf("devfs_make_node: not ready for devices!\n");
-               goto out;
-       }
         if (chrblk != DEVFS_CHAR && chrblk != DEVFS_BLOCK)
                 goto out;
  
-       DEVFS_UNLOCK();
+       type = (chrblk == DEVFS_BLOCK ? DEV_BDEV : DEV_CDEV);
  
         va_start(ap, fmt);
-       vsnprintf(buf, sizeof(buf), fmt, ap);
+       new_dev = devfs_make_node_internal(dev, type, uid, gid, perms, NULL, fmt, ap);
         va_end(ap);
+       
+out:
+       return new_dev;
+}
+
+static devdirent_t *
+devfs_make_node_internal(dev_t dev, devfstype_t type, uid_t uid, 
+               gid_t gid, int perms, int (*clone)(dev_t dev, int action), const char *fmt, va_list ap)
+{
+       devdirent_t *   new_dev = NULL;
+       devnode_t * dnp;
+       devnode_type_t  typeinfo;
+
+       char            *name, buf[256]; /* XXX */
+       const char      *path;
+#if CONFIG_MACF
+       char buff[sizeof(buf)];
+#endif
+       int             i;
+       uint32_t        log_count;
+       struct devfs_event_log event_log;
+       struct devfs_vnode_event stackbuf[NUM_STACK_ENTRIES];
+       int             need_free = 0;
+
+       vsnprintf(buf, sizeof(buf), fmt, ap);
  
+#if CONFIG_MACF
+       bcopy(buf, buff, sizeof(buff));
+       buff[sizeof(buff)-1] = 0;
+#endif
         name = NULL;
  
         for(i=strlen(buf); i>0; i--)
@@ -1149,23 +1559,55 @@ devfs_make_node(dev_t dev, int chrblk, uid_t uid,
                 name = buf;
                 path = "/";
         }
+
+       log_count = devfs_nmountplanes;
+       if (log_count > NUM_STACK_ENTRIES) {
+wrongsize:
+               need_free = 1;
+               if (devfs_init_event_log(&event_log, log_count, NULL) != 0) {
+                       return NULL;
+               }
+       } else {
+               need_free = 0;
+               log_count = NUM_STACK_ENTRIES;
+               if (devfs_init_event_log(&event_log, log_count, &stackbuf[0]) != 0) {
+                       return NULL;
+               }
+       }
+
         DEVFS_LOCK();
+       if (log_count < devfs_nmountplanes) {
+               DEVFS_UNLOCK();
+               devfs_release_event_log(&event_log, need_free);
+               log_count = log_count * 2;
+               goto wrongsize;
+       }
+       
+       if (!devfs_ready) {
+               printf("devfs_make_node: not ready for devices!\n");
+               goto out;
+       }
  
         /* find/create directory path ie. mkdir -p */
-       if (dev_finddir(path, NULL, CREATE, &dnp) == 0) {
+       if (dev_finddir(path, NULL, DEVFS_CREATE, &dnp, &event_log) == 0) {
             typeinfo.dev = dev;
-           if (dev_add_entry(name, dnp, 
-                             (chrblk == DEVFS_CHAR) ? DEV_CDEV : DEV_BDEV, 
-                             &typeinfo, NULL, NULL, &new_dev) == 0) {
+           if (dev_add_entry(name, dnp, type, &typeinfo, NULL, NULL, &new_dev) == 0) {
                 new_dev->de_dnp->dn_gid = gid;
                 new_dev->de_dnp->dn_uid = uid;
                 new_dev->de_dnp->dn_mode |= perms;
-               devfs_propogate(dnp->dn_typeinfo.Dir.myname, new_dev);
+               new_dev->de_dnp->dn_clone = clone;
+#if CONFIG_MACF
+               mac_devfs_label_associate_device(dev, new_dev->de_dnp, buff);
+#endif
+               devfs_propogate(dnp->dn_typeinfo.Dir.myname, new_dev, &event_log);
             }
         }
+
  out:
         DEVFS_UNLOCK();
  
+       devfs_bulk_notify(&event_log);
+       devfs_release_event_log(&event_log, need_free);
         return new_dev;
  }
  
@@ -1184,6 +1626,8 @@ devfs_make_link(void *original, char *fmt, ...)
         devdirent_t *   new_dev = NULL;
         devdirent_t *   orig = (devdirent_t *) original;
         devnode_t *     dirnode;        /* devnode for parent directory */
+       struct devfs_event_log event_log;
+       uint32_t        log_count;
  
         va_list ap;
         char *p, buf[256]; /* XXX */
@@ -1192,8 +1636,9 @@ devfs_make_link(void *original, char *fmt, ...)
         DEVFS_LOCK();
  
         if (!devfs_ready) {
+               DEVFS_UNLOCK();
                 printf("devfs_make_link: not ready for devices!\n");
-               goto out;
+               return -1;
         }
         DEVFS_UNLOCK();
  
@@ -1210,24 +1655,43 @@ devfs_make_link(void *original, char *fmt, ...)
                                 break;
                 }
         }
+       
+       /* 
+        * One slot for each directory, one for each devnode 
+        * whose link count changes 
+        */
+       log_count = devfs_nmountplanes * 2;
+wrongsize:
+       if (devfs_init_event_log(&event_log, log_count, NULL) != 0) {
+               /* No lock held, no allocations done, can just return */
+               return -1;
+       }
+
         DEVFS_LOCK();
  
+       if (log_count < devfs_nmountplanes) {
+               DEVFS_UNLOCK();
+               devfs_release_event_log(&event_log, 1);
+               log_count = log_count * 2;
+               goto wrongsize;
+       }
+
         if (p) {
                 *p++ = '\0';
  
-               if (dev_finddir(buf, NULL, CREATE, &dirnode)
+               if (dev_finddir(buf, NULL, DEVFS_CREATE, &dirnode, &event_log)
                     || dev_add_name(p, dirnode, NULL, orig->de_dnp, &new_dev))
                         goto fail;
         } else {
-               if (dev_finddir("", NULL, CREATE, &dirnode)
+               if (dev_finddir("", NULL, DEVFS_CREATE, &dirnode, &event_log)
                     || dev_add_name(buf, dirnode, NULL, orig->de_dnp, &new_dev))
                         goto fail;
         }
-       devfs_propogate(dirnode->dn_typeinfo.Dir.myname, new_dev);
+       devfs_propogate(dirnode->dn_typeinfo.Dir.myname, new_dev, &event_log);
  fail:
-out:
         DEVFS_UNLOCK();
+       devfs_bulk_notify(&event_log);
+       devfs_release_event_log(&event_log, 1);
  
         return ((new_dev != NULL) ? 0 : -1);
  }
-