X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/9bccf70c0258c7cac2dcb80011b2a964d884c552..f427ee49d309d8fc33ebf3042c3a775f2f530ded:/bsd/miscfs/devfs/devfs_tree.c diff --git a/bsd/miscfs/devfs/devfs_tree.c b/bsd/miscfs/devfs/devfs_tree.c index 9707f8a9c..21a0ac5c0 100644 --- a/bsd/miscfs/devfs/devfs_tree.c +++ b/bsd/miscfs/devfs/devfs_tree.c @@ -1,29 +1,35 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * Copyright 1997,1998 Julian Elischer. All rights reserved. * julian@freebsd.org - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: @@ -32,7 +38,7 @@ * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. - * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE @@ -44,9 +50,15 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * + * * devfs_tree.c */ +/* + * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ /* * HISTORY @@ -64,7 +76,7 @@ * vnode instead of the existing one that has the mounted_here * field filled in; the net effect was that the filesystem mounted * on top of us would never show up - * - added devfs_stats to store how many data structures are actually + * - added devfs_stats to store how many data structures are actually * allocated */ @@ -82,26 +94,79 @@ #include #include #include -#include +#include #include -#include +#include #include - +#include +#include +#define BSD_KERNEL_PRIVATE 1 /* devfs_make_link() prototype */ #include "devfs.h" #include "devfsdefs.h" -struct lock__bsd__ devfs_lock; /* the "big switch" */ -devdirent_t * dev_root = NULL; /* root of backing tree */ -struct devfs_stats devfs_stats; /* hold stats */ +#if CONFIG_MACF +#include +#endif + +#if FDESC +#include "fdesc.h" +#endif + +typedef struct devfs_vnode_event { + vnode_t dve_vp; + uint32_t dve_vid; + uint32_t dve_events; +} *devfs_vnode_event_t; + +/* + * Size of stack buffer (fast path) for notifications. If + * the number of mounts is small, no need to malloc a buffer. + */ +#define NUM_STACK_ENTRIES 5 + +typedef struct devfs_event_log { + size_t del_max; + size_t del_used; + devfs_vnode_event_t del_entries; +} *devfs_event_log_t; + + +static void dev_free_hier(devdirent_t *); +static int devfs_propogate(devdirent_t *, devdirent_t *, devfs_event_log_t); +static int dev_finddir(const char *, devnode_t *, int, devnode_t **, devfs_event_log_t); +static int dev_dup_entry(devnode_t *, devdirent_t *, devdirent_t **, struct devfsmount *); +void devfs_ref_node(devnode_t *); +void devfs_rele_node(devnode_t *); +static void devfs_record_event(devfs_event_log_t, devnode_t*, uint32_t); +static int devfs_init_event_log(devfs_event_log_t, uint32_t, devfs_vnode_event_t); +static void devfs_release_event_log(devfs_event_log_t, int); +static void devfs_bulk_notify(devfs_event_log_t); +static devdirent_t *devfs_make_node_internal(dev_t, devfstype_t type, uid_t, gid_t, int, + int (*clone)(dev_t dev, int action), const char *fmt, va_list ap); + + +lck_grp_t * devfs_lck_grp; +lck_grp_attr_t * devfs_lck_grp_attr; +lck_attr_t * devfs_lck_attr; +lck_mtx_t devfs_mutex; +lck_mtx_t devfs_attr_mutex; + +os_refgrp_decl(static, devfs_refgrp, "devfs", NULL); + +devdirent_t * dev_root = NULL; /* root of backing tree */ +struct devfs_stats devfs_stats; /* hold stats */ + +static ino_t devfs_unique_fileno = 0; #ifdef HIDDEN_MOUNTPOINT static struct mount *devfs_hidden_mount; -#endif HIDDEN_MOINTPOINT +#endif /* HIDDEN_MOINTPOINT */ static int devfs_ready = 0; +static uint32_t devfs_nmountplanes = 0; /* The first plane is not used for a mount */ -#define NOCREATE FALSE -#define CREATE TRUE +#define DEVFS_NOCREATE FALSE +#define DEVFS_CREATE TRUE /* * Set up the root directory node in the backing plane @@ -116,27 +181,52 @@ static int devfs_ready = 0; int devfs_sinit(void) { - lockinit(&devfs_lock, PINOD, "devfs", 0, 0); - if (dev_add_entry("root", NULL, DEV_DIR, NULL, NULL, NULL, - &dev_root)) { - printf("devfs_sinit: dev_add_entry failed "); - return (EOPNOTSUPP); + int error; + + devfs_lck_grp_attr = lck_grp_attr_alloc_init(); + devfs_lck_grp = lck_grp_alloc_init("devfs_lock", devfs_lck_grp_attr); + + devfs_lck_attr = lck_attr_alloc_init(); + + lck_mtx_init(&devfs_mutex, devfs_lck_grp, devfs_lck_attr); + lck_mtx_init(&devfs_attr_mutex, devfs_lck_grp, devfs_lck_attr); + + DEVFS_LOCK(); + error = dev_add_entry("root", NULL, DEV_DIR, NULL, NULL, NULL, &dev_root); + DEVFS_UNLOCK(); + + if (error) { + printf("devfs_sinit: dev_add_entry failed "); + return ENOTSUP; } #ifdef HIDDEN_MOUNTPOINT - MALLOC(devfs_hidden_mount, struct mount *, sizeof(struct mount), - M_MOUNT, M_WAITOK); - bzero(devfs_hidden_mount,sizeof(struct mount)); - - /* Initialize the default IO constraints */ - mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; - mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; - - devfs_mount(devfs_hidden_mount,"dummy",NULL,NULL,NULL); - dev_root->de_dnp->dn_dvm - = (struct devfsmount *)devfs_hidden_mount->mnt_data; -#endif HIDDEN_MOUNTPOINT + devfs_hidden_mount = zalloc_flags(mount_zone, Z_WAITOK | Z_ZERO); + mount_lock_init(devfs_hidden_mount); + TAILQ_INIT(&devfs_hidden_mount->mnt_vnodelist); + TAILQ_INIT(&devfs_hidden_mount->mnt_workerqueue); + TAILQ_INIT(&devfs_hidden_mount->mnt_newvnodes); +#if CONFIG_MACF + mac_mount_label_init(devfs_hidden_mount); + mac_mount_label_associate(vfs_context_kernel(), devfs_hidden_mount); +#endif + + /* Initialize the default IO constraints */ + mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; + mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; + mp->mnt_ioflags = 0; + mp->mnt_realrootvp = NULLVP; + mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL; + + devfs_mount(devfs_hidden_mount, "dummy", NULL, NULL, NULL); + dev_root->de_dnp->dn_dvm + = (struct devfsmount *)devfs_hidden_mount->mnt_data; +#endif /* HIDDEN_MOUNTPOINT */ +#if CONFIG_MACF + mac_devfs_label_associate_directory("/", (int) strlen("/"), + dev_root->de_dnp, "/"); +#endif devfs_ready = 1; - return (0); + return 0; } /***********************************************************************\ @@ -146,264 +236,184 @@ devfs_sinit(void) \***********************************************************************/ -/***************************************************************\ -* Search down the linked list off a dir to find "name" * + +/*************************************************************** +* Search down the linked list off a dir to find "name" * return the devnode_t * for that node. -\***************************************************************/ -/*proto*/ +* +* called with DEVFS_LOCK held +***************************************************************/ devdirent_t * -dev_findname(devnode_t * dir,char *name) +dev_findname(devnode_t * dir, const char *name) { devdirent_t * newfp; - if (dir->dn_type != DEV_DIR) return 0;/*XXX*/ /* printf?*/ - - if (name[0] == '.') - { - if(name[1] == 0) - { + if (dir->dn_type != DEV_DIR) { + return 0; /*XXX*/ /* printf?*/ + } + if (name[0] == '.') { + if (name[1] == 0) { return dir->dn_typeinfo.Dir.myname; } - if((name[1] == '.') && (name[2] == 0)) - { + if ((name[1] == '.') && (name[2] == 0)) { /* for root, .. == . */ return dir->dn_typeinfo.Dir.parent->dn_typeinfo.Dir.myname; } } newfp = dir->dn_typeinfo.Dir.dirlist; - while(newfp) - { - if(!(strcmp(name,newfp->de_name))) + + while (newfp) { + if (!(strncmp(name, newfp->de_name, sizeof(newfp->de_name)))) { return newfp; + } newfp = newfp->de_next; } return NULL; } -#if 0 -/***********************************************************************\ -* Given a starting node (0 for root) and a pathname, return the node * -* for the end item on the path. It MUST BE A DIRECTORY. If the 'CREATE' * -* option is true, then create any missing nodes in the path and create * -* and return the final node as well. * -* This is used to set up a directory, before making nodes in it.. * -* * -* Warning: This function is RECURSIVE. * -\***********************************************************************/ -int -dev_finddir(char * orig_path, /* find this dir (err if not dir) */ - devnode_t * dirnode, /* starting point */ - int create, /* create path? */ - devnode_t * * dn_pp) /* returned */ -{ - devdirent_t * dirent_p; - devnode_t * dnp = NULL; - char pathbuf[DEVMAXPATHSIZE]; - char *path; - char *name; - register char *cp; - int retval; - - - /***************************************\ - * If no parent directory is given * - * then start at the root of the tree * - \***************************************/ - if(!dirnode) dirnode = dev_root->de_dnp; - - /***************************************\ - * Sanity Checks * - \***************************************/ - if (dirnode->dn_type != DEV_DIR) return ENOTDIR; - if(strlen(orig_path) > (DEVMAXPATHSIZE - 1)) return ENAMETOOLONG; - - - path = pathbuf; - strcpy(path,orig_path); - - /***************************************\ - * always absolute, skip leading / * - * get rid of / or // or /// etc. * - \***************************************/ - while(*path == '/') path++; - - /***************************************\ - * If nothing left, then parent was it.. * - \***************************************/ - if ( *path == '\0' ) { - *dn_pp = dirnode; - return 0; - } - - /***************************************\ - * find the next segment of the name * - \***************************************/ - cp = name = path; - while((*cp != '/') && (*cp != 0)) { - cp++; - } - - /***********************************************\ - * Check to see if it's the last component * - \***********************************************/ - if(*cp) { - path = cp + 1; /* path refers to the rest */ - *cp = 0; /* name is now a separate string */ - if(!(*path)) { - path = (char *)0; /* was trailing slash */ - } - } else { - path = NULL; /* no more to do */ - } - - /***************************************\ - * Start scanning along the linked list * - \***************************************/ - dirent_p = dev_findname(dirnode,name); - if(dirent_p) { /* check it's a directory */ - dnp = dirent_p->de_dnp; - if(dnp->dn_type != DEV_DIR) return ENOTDIR; - } else { - /***************************************\ - * The required element does not exist * - * So we will add it if asked to. * - \***************************************/ - if(!create) return ENOENT; - - if((retval = dev_add_entry(name, dirnode, - DEV_DIR, NULL, NULL, NULL, - &dirent_p)) != 0) { - return retval; - } - dnp = dirent_p->de_dnp; - devfs_propogate(dirnode->dn_typeinfo.Dir.myname,dirent_p); - } - if(path != NULL) { /* decide whether to recurse more or return */ - return (dev_finddir(path,dnp,create,dn_pp)); - } else { - *dn_pp = dnp; - return 0; - } -} -#endif 0 -/***********************************************************************\ -* Given a starting node (0 for root) and a pathname, return the node * -* for the end item on the path. It MUST BE A DIRECTORY. If the 'CREATE' * -* option is true, then create any missing nodes in the path and create * -* and return the final node as well. * -* This is used to set up a directory, before making nodes in it.. * -\***********************************************************************/ -/* proto */ -int -dev_finddir(char * path, - devnode_t * dirnode, - int create, - devnode_t * * dn_pp) +/*********************************************************************** +* Given a starting node (0 for root) and a pathname, return the node +* for the end item on the path. It MUST BE A DIRECTORY. If the 'DEVFS_CREATE' +* option is true, then create any missing nodes in the path and create +* and return the final node as well. +* This is used to set up a directory, before making nodes in it.. +* +* called with DEVFS_LOCK held +***********************************************************************/ +static int +dev_finddir(const char * path, + devnode_t * dirnode, + int create, + devnode_t * * dn_pp, + devfs_event_log_t delp) { - devnode_t * dnp = NULL; - int error = 0; - char * scan; + devnode_t * dnp = NULL; + int error = 0; + const char * scan; +#if CONFIG_MACF + char fullpath[DEVMAXPATHSIZE]; +#endif - if (!dirnode) /* dirnode == NULL means start at root */ - dirnode = dev_root->de_dnp; + if (!dirnode) { /* dirnode == NULL means start at root */ + dirnode = dev_root->de_dnp; + } - if (dirnode->dn_type != DEV_DIR) - return ENOTDIR; + if (dirnode->dn_type != DEV_DIR) { + return ENOTDIR; + } - if (strlen(path) > (DEVMAXPATHSIZE - 1)) - return ENAMETOOLONG; + if (strlen(path) > (DEVMAXPATHSIZE - 1)) { + return ENAMETOOLONG; + } +#if CONFIG_MACF + strlcpy(fullpath, path, DEVMAXPATHSIZE); +#endif scan = path; - while (*scan == '/') - scan++; + while (*scan == '/') { + scan++; + } *dn_pp = NULL; while (1) { - char component[DEVMAXPATHSIZE]; - devdirent_t * dirent_p; - char * start; + char component[DEVMAXPATHSIZE]; + devdirent_t * dirent_p; + const char * start; - if (*scan == 0) { - /* we hit the end of the string, we're done */ - *dn_pp = dirnode; - break; - } - start = scan; - while (*scan != '/' && *scan) - scan++; - - strncpy(component, start, scan - start); - if (*scan == '/') - scan++; + if (*scan == 0) { + /* we hit the end of the string, we're done */ + *dn_pp = dirnode; + break; + } + start = scan; + while (*scan != '/' && *scan) { + scan++; + } - dirent_p = dev_findname(dirnode, component); - if (dirent_p) { - dnp = dirent_p->de_dnp; - if (dnp->dn_type != DEV_DIR) { - error = ENOTDIR; - break; + strlcpy(component, start, (scan - start) + 1); + if (*scan == '/') { + scan++; } - } - else { - if (!create) { - error = ENOENT; - break; + + dirent_p = dev_findname(dirnode, component); + if (dirent_p) { + dnp = dirent_p->de_dnp; + if (dnp->dn_type != DEV_DIR) { + error = ENOTDIR; + break; + } + } else { + if (!create) { + error = ENOENT; + break; + } + error = dev_add_entry(component, dirnode, + DEV_DIR, NULL, NULL, NULL, &dirent_p); + if (error) { + break; + } + dnp = dirent_p->de_dnp; +#if CONFIG_MACF + mac_devfs_label_associate_directory( + dirnode->dn_typeinfo.Dir.myname->de_name, + (int) strlen(dirnode->dn_typeinfo.Dir.myname->de_name), + dnp, fullpath); +#endif + devfs_propogate(dirnode->dn_typeinfo.Dir.myname, dirent_p, delp); } - error = dev_add_entry(component, dirnode, - DEV_DIR, NULL, NULL, NULL, &dirent_p); - if (error) - break; - dnp = dirent_p->de_dnp; - devfs_propogate(dirnode->dn_typeinfo.Dir.myname, dirent_p); - } - dirnode = dnp; /* continue relative to this directory */ - } - return (error); + dirnode = dnp; /* continue relative to this directory */ + } + return error; } -/***********************************************************************\ -* Add a new NAME element to the devfs * -* If we're creating a root node, then dirname is NULL * -* Basically this creates a new namespace entry for the device node * -* * -* Creates a name node, and links it to the supplied node * -\***********************************************************************/ -/*proto*/ +/*********************************************************************** +* Add a new NAME element to the devfs +* If we're creating a root node, then dirname is NULL +* Basically this creates a new namespace entry for the device node +* +* Creates a name node, and links it to the supplied node +* +* called with DEVFS_LOCK held +***********************************************************************/ int -dev_add_name(char * name, devnode_t * dirnode, devdirent_t * back, +dev_add_name(const char * name, devnode_t * dirnode, __unused devdirent_t * back, devnode_t * dnp, devdirent_t * *dirent_pp) { - devdirent_t * dirent_p = NULL; + devdirent_t * dirent_p = NULL; - if(dirnode != NULL ) { - if(dirnode->dn_type != DEV_DIR) return(ENOTDIR); - - if( dev_findname(dirnode,name)) - return(EEXIST); + if (dirnode != NULL) { + if (dirnode->dn_type != DEV_DIR) { + return ENOTDIR; + } + + if (dev_findname(dirnode, name)) { + return EEXIST; + } } /* * make sure the name is legal * slightly misleading in the case of NULL */ - if (!name || (strlen(name) > (DEVMAXNAMESIZE - 1))) - return (ENAMETOOLONG); + if (!name || (strlen(name) > (DEVMAXNAMESIZE - 1))) { + return ENAMETOOLONG; + } /* - * Allocate and fill out a new directory entry + * Allocate and fill out a new directory entry */ - MALLOC(dirent_p, devdirent_t *, sizeof(devdirent_t), - M_DEVFSNAME, M_WAITOK); + MALLOC(dirent_p, devdirent_t *, sizeof(devdirent_t), + M_DEVFSNAME, M_WAITOK); if (!dirent_p) { - return ENOMEM; + return ENOMEM; } - bzero(dirent_p,sizeof(devdirent_t)); + bzero(dirent_p, sizeof(devdirent_t)); /* inherrit our parent's mount info */ /*XXX*/ /* a kludge but.... */ - if(dirnode && ( dnp->dn_dvm == NULL)) { + if (dirnode && (dnp->dn_dvm == NULL)) { dnp->dn_dvm = dirnode->dn_dvm; /* if(!dnp->dn_dvm) printf("parent had null dvm "); */ } @@ -414,13 +424,13 @@ dev_add_name(char * name, devnode_t * dirnode, devdirent_t * back, * this stops it from being accidentally freed later. */ dirent_p->de_dnp = dnp; - dnp->dn_links++ ; /* implicit from our own name-node */ + dnp->dn_links++; /* implicit from our own name-node */ - /* + /* * Make sure that we can find all the links that reference a node * so that we can get them all if we need to zap the node. */ - if(dnp->dn_linklist) { + if (dnp->dn_linklist) { dirent_p->de_nextlink = dnp->dn_linklist; dirent_p->de_prevlinkp = dirent_p->de_nextlink->de_prevlinkp; dirent_p->de_nextlink->de_prevlinkp = &(dirent_p->de_nextlink); @@ -432,22 +442,22 @@ dev_add_name(char * name, devnode_t * dirnode, devdirent_t * back, dnp->dn_linklist = dirent_p; /* - * If the node is a directory, then we need to handle the + * If the node is a directory, then we need to handle the * creation of the .. link. * A NULL dirnode indicates a root node, so point to ourself. */ - if(dnp->dn_type == DEV_DIR) { + if (dnp->dn_type == DEV_DIR) { dnp->dn_typeinfo.Dir.myname = dirent_p; /* * If we are unlinking from an old dir, decrement its links * as we point our '..' elsewhere - * Note: it's up to the calling code to remove the + * Note: it's up to the calling code to remove the * us from the original directory's list */ - if(dnp->dn_typeinfo.Dir.parent) { + if (dnp->dn_typeinfo.Dir.parent) { dnp->dn_typeinfo.Dir.parent->dn_links--; } - if(dirnode) { + if (dirnode) { dnp->dn_typeinfo.Dir.parent = dirnode; } else { dnp->dn_typeinfo.Dir.parent = dnp; @@ -458,23 +468,21 @@ dev_add_name(char * name, devnode_t * dirnode, devdirent_t * back, /* * put the name into the directory entry. */ - strcpy(dirent_p->de_name, name); + strlcpy(dirent_p->de_name, name, DEVMAXNAMESIZE); /* * Check if we are not making a root node.. * (i.e. have parent) */ - if(dirnode) { + if (dirnode) { /* - * Put it on the END of the linked list of directory entries - */ - int len; - + * Put it on the END of the linked list of directory entries + */ dirent_p->de_parent = dirnode; /* null for root */ dirent_p->de_prevp = dirnode->dn_typeinfo.Dir.dirlast; - dirent_p->de_next = *(dirent_p->de_prevp); /* should be NULL */ - /*right?*/ + dirent_p->de_next = *(dirent_p->de_prevp); /* should be NULL */ + /*right?*/ *(dirent_p->de_prevp) = dirent_p; dirnode->dn_typeinfo.Dir.dirlast = &(dirent_p->de_next); dirnode->dn_typeinfo.Dir.entrycount++; @@ -483,30 +491,32 @@ dev_add_name(char * name, devnode_t * dirnode, devdirent_t * back, *dirent_pp = dirent_p; DEVFS_INCR_ENTRIES(); - return 0 ; + return 0; } -/***********************************************************************\ -* Add a new element to the devfs plane. * -* * -* Creates a new dev_node to go with it if the prototype should not be * -* reused. (Is a DIR, or we select SPLIT_DEVS at compile time) * -* typeinfo gives us info to make our node if we don't have a prototype. * -* If typeinfo is null and proto exists, then the typeinfo field of * -* the proto is used intead in the CREATE case. * -* note the 'links' count is 0 (except if a dir) * -* but it is only cleared on a transition * -* so this is ok till we link it to something * -* Even in SPLIT_DEVS mode, * -* if the node already exists on the wanted plane, just return it * -\***********************************************************************/ -/*proto*/ +/*********************************************************************** +* Add a new element to the devfs plane. +* +* Creates a new dev_node to go with it if the prototype should not be +* reused. (Is a DIR, or we select SPLIT_DEVS at compile time) +* typeinfo gives us info to make our node if we don't have a prototype. +* If typeinfo is null and proto exists, then the typeinfo field of +* the proto is used intead in the DEVFS_CREATE case. +* note the 'links' count is 0 (except if a dir) +* but it is only cleared on a transition +* so this is ok till we link it to something +* Even in SPLIT_DEVS mode, +* if the node already exists on the wanted plane, just return it +* +* called with DEVFS_LOCK held +***********************************************************************/ int dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, - devnode_t * *dn_pp, struct devfsmount *dvm) + devnode_t * *dn_pp, struct devfsmount *dvm) { - devnode_t * dnp = NULL; + devnode_t * dnp = NULL; + int error = 0; #if defined SPLIT_DEVS /* @@ -515,36 +525,37 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, */ if (proto) { dnp = proto->dn_nextsibling; - while( dnp != proto) { + while (dnp != proto) { if (dnp->dn_dvm == dvm) { *dn_pp = dnp; - return (0); + return 0; } dnp = dnp->dn_nextsibling; } - if (typeinfo == NULL) + if (typeinfo == NULL) { typeinfo = &(proto->dn_typeinfo); + } } -#else /* SPLIT_DEVS */ - if ( proto ) { +#else /* SPLIT_DEVS */ + if (proto) { switch (proto->type) { - case DEV_BDEV: - case DEV_CDEV: - *dn_pp = proto; - return 0; + case DEV_BDEV: + case DEV_CDEV: + *dn_pp = proto; + return 0; } } -#endif /* SPLIT_DEVS */ +#endif /* SPLIT_DEVS */ MALLOC(dnp, devnode_t *, sizeof(devnode_t), M_DEVFSNODE, M_WAITOK); if (!dnp) { - return ENOMEM; + return ENOMEM; } /* * If we have a proto, that means that we are duplicating some * other device, which can only happen if we are not at the back plane */ - if(proto) { + if (proto) { bcopy(proto, dnp, sizeof(devnode_t)); dnp->dn_links = 0; dnp->dn_linklist = NULL; @@ -555,27 +566,39 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, *(dnp->dn_prevsiblingp) = dnp; dnp->dn_nextsibling = proto; proto->dn_prevsiblingp = &(dnp->dn_nextsibling); +#if CONFIG_MACF + mac_devfs_label_init(dnp); + mac_devfs_label_copy(proto->dn_label, dnp->dn_label); +#endif } else { - struct timeval tv; + struct timeval tv; - /* + /* * We have no prototype, so start off with a clean slate */ - tv = time; - bzero(dnp,sizeof(devnode_t)); + microtime(&tv); + bzero(dnp, sizeof(devnode_t)); dnp->dn_type = entrytype; dnp->dn_nextsibling = dnp; dnp->dn_prevsiblingp = &(dnp->dn_nextsibling); dnp->dn_atime.tv_sec = tv.tv_sec; dnp->dn_mtime.tv_sec = tv.tv_sec; dnp->dn_ctime.tv_sec = tv.tv_sec; +#if CONFIG_MACF + mac_devfs_label_init(dnp); +#endif } dnp->dn_dvm = dvm; + /* Note: this inits the reference count to 1, this is considered unreferenced */ + os_ref_init_raw(&dnp->dn_refcount, &devfs_refgrp); + dnp->dn_ino = devfs_unique_fileno; + devfs_unique_fileno++; + /* * fill out the dev node according to type */ - switch(entrytype) { + switch (entrytype) { case DEV_DIR: /* * As it's a directory, make sure @@ -593,31 +616,30 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, * that we use (by default) for directories */ dnp->dn_ops = &devfs_vnodeop_p; - dnp->dn_mode |= 0555; /* default perms */ + dnp->dn_mode |= 0555; /* default perms */ break; case DEV_SLNK: /* * As it's a symlink allocate and store the link info * Symlinks should only ever be created by the user, - * so they are not on the back plane and should not be + * so they are not on the back plane and should not be * propogated forward.. a bit like directories in that way.. * A symlink only exists on one plane and has its own * node.. therefore we might be on any random plane. */ - MALLOC(dnp->dn_typeinfo.Slnk.name, char *, - typeinfo->Slnk.namelen+1, - M_DEVFSNODE, M_WAITOK); + MALLOC(dnp->dn_typeinfo.Slnk.name, char *, + typeinfo->Slnk.namelen + 1, + M_DEVFSNODE, M_WAITOK); if (!dnp->dn_typeinfo.Slnk.name) { - FREE(dnp,M_DEVFSNODE); - return ENOMEM; + error = ENOMEM; + break; } - strncpy(dnp->dn_typeinfo.Slnk.name, typeinfo->Slnk.name, - typeinfo->Slnk.namelen); - dnp->dn_typeinfo.Slnk.name[typeinfo->Slnk.namelen] = '\0'; + strlcpy(dnp->dn_typeinfo.Slnk.name, typeinfo->Slnk.name, + typeinfo->Slnk.namelen + 1); dnp->dn_typeinfo.Slnk.namelen = typeinfo->Slnk.namelen; DEVFS_INCR_STRINGSPACE(dnp->dn_typeinfo.Slnk.namelen + 1); dnp->dn_ops = &devfs_vnodeop_p; - dnp->dn_mode |= 0555; /* default perms */ + dnp->dn_mode |= 0555; /* default perms */ break; case DEV_CDEV: case DEV_BDEV: @@ -628,145 +650,247 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, dnp->dn_ops = &devfs_spec_vnodeop_p; dnp->dn_typeinfo.dev = typeinfo->dev; break; + + #if FDESC + /* /dev/fd is special */ + case DEV_DEVFD: + dnp->dn_ops = &devfs_devfd_vnodeop_p; + dnp->dn_mode |= 0555; /* default perms */ + break; + + #endif /* FDESC */ default: - return EINVAL; + error = EINVAL; + } + + if (error) { + FREE(dnp, M_DEVFSNODE); + } else { + *dn_pp = dnp; + DEVFS_INCR_NODES(); } - *dn_pp = dnp; - DEVFS_INCR_NODES(); - return 0 ; + return error; } -/*proto*/ +/*********************************************************************** + * called with DEVFS_LOCK held + **********************************************************************/ void devnode_free(devnode_t * dnp) { - if (dnp->dn_type == DEV_SLNK) { - DEVFS_DECR_STRINGSPACE(dnp->dn_typeinfo.Slnk.namelen + 1); - FREE(dnp->dn_typeinfo.Slnk.name,M_DEVFSNODE); - } - FREE(dnp, M_DEVFSNODE); - DEVFS_DECR_NODES(); - return; +#if CONFIG_MACF + mac_devfs_label_destroy(dnp); +#endif + if (dnp->dn_type == DEV_SLNK) { + DEVFS_DECR_STRINGSPACE(dnp->dn_typeinfo.Slnk.namelen + 1); + FREE(dnp->dn_typeinfo.Slnk.name, M_DEVFSNODE); + } + DEVFS_DECR_NODES(); + FREE(dnp, M_DEVFSNODE); } -/*proto*/ -void + +/*********************************************************************** + * called with DEVFS_LOCK held + **********************************************************************/ +static void devfs_dn_free(devnode_t * dnp) { - if(--dnp->dn_links <= 0 ) /* can be -1 for initial free, on error */ - { + if (--dnp->dn_links <= 0) { /* can be -1 for initial free, on error */ /*probably need to do other cleanups XXX */ if (dnp->dn_nextsibling != dnp) { - devnode_t * * prevp = dnp->dn_prevsiblingp; + devnode_t * * prevp = dnp->dn_prevsiblingp; *prevp = dnp->dn_nextsibling; dnp->dn_nextsibling->dn_prevsiblingp = prevp; - - } - if (dnp->dn_vn == NULL) { -#if 0 - printf("devfs_dn_free: free'ing %x\n", (unsigned int)dnp); -#endif 0 - devnode_free(dnp); /* no accesses/references */ } - else { -#if 0 - printf("devfs_dn_free: marking %x for deletion\n", - (unsigned int)dnp); -#endif 0 - dnp->dn_delete = TRUE; + + /* Can only free if there are no references; otherwise, wait for last vnode to be reclaimed */ + os_ref_count_t rc = os_ref_get_count_raw(&dnp->dn_refcount); + if (rc == 1) { + /* release final reference from dev_add_node */ + (void) os_ref_release_locked_raw(&dnp->dn_refcount, &devfs_refgrp); + devnode_free(dnp); + } else { + dnp->dn_lflags |= DN_DELETE; } } } /***********************************************************************\ -* Front Node Operations * +* Front Node Operations * * Add or delete a chain of front nodes * \***********************************************************************/ -/***********************************************************************\ -* Given a directory backing node, and a child backing node, add the * -* appropriate front nodes to the front nodes of the directory to * -* represent the child node to the user * -* * -* on failure, front nodes will either be correct or not exist for each * -* front dir, however dirs completed will not be stripped of completed * -* frontnodes on failure of a later frontnode * -* * -* This allows a new node to be propogated through all mounted planes * -* * -\***********************************************************************/ -/*proto*/ -int -devfs_propogate(devdirent_t * parent,devdirent_t * child) + +/*********************************************************************** +* Given a directory backing node, and a child backing node, add the +* appropriate front nodes to the front nodes of the directory to +* represent the child node to the user +* +* on failure, front nodes will either be correct or not exist for each +* front dir, however dirs completed will not be stripped of completed +* frontnodes on failure of a later frontnode +* +* This allows a new node to be propogated through all mounted planes +* +* called with DEVFS_LOCK held +***********************************************************************/ +static int +devfs_propogate(devdirent_t * parent, devdirent_t * child, devfs_event_log_t delp) { - int error; + int error; devdirent_t * newnmp; - devnode_t * dnp = child->de_dnp; - devnode_t * pdnp = parent->de_dnp; - devnode_t * adnp = parent->de_dnp; + devnode_t * dnp = child->de_dnp; + devnode_t * pdnp = parent->de_dnp; + devnode_t * adnp = parent->de_dnp; int type = child->de_dnp->dn_type; + uint32_t events; + + events = (dnp->dn_type == DEV_DIR ? VNODE_EVENT_DIR_CREATED : VNODE_EVENT_FILE_CREATED); + if (delp != NULL) { + devfs_record_event(delp, pdnp, events); + } - /***********************************************\ - * Find the other instances of the parent node * - \***********************************************/ + /*********************************************** + * Find the other instances of the parent node + ***********************************************/ for (adnp = pdnp->dn_nextsibling; - adnp != pdnp; - adnp = adnp->dn_nextsibling) - { + adnp != pdnp; + adnp = adnp->dn_nextsibling) { /* * Make the node, using the original as a prototype) * if the node already exists on that plane it won't be * re-made.. */ if ((error = dev_add_entry(child->de_name, adnp, type, - NULL, dnp, adnp->dn_dvm, - &newnmp)) != 0) { - printf("duplicating %s failed\n",child->de_name); + NULL, dnp, adnp->dn_dvm, + &newnmp)) != 0) { + printf("duplicating %s failed\n", child->de_name); + } else { + if (delp != NULL) { + devfs_record_event(delp, adnp, events); + + /* + * Slightly subtle. We're guaranteed that there will + * only be a vnode hooked into this devnode if we're creating + * a new link to an existing node; otherwise, the devnode is new + * and no one can have looked it up yet. If we're making a link, + * then the buffer is large enough for two nodes in each + * plane; otherwise, there's no vnode and this call will + * do nothing. + */ + devfs_record_event(delp, newnmp->de_dnp, VNODE_EVENT_LINK); + } } } - return 0; /* for now always succeed */ + return 0; /* for now always succeed */ +} + +static uint32_t +remove_notify_count(devnode_t *dnp) +{ + uint32_t notify_count = 0; + devnode_t *dnp2; + + /* + * Could need to notify for one removed node on each mount and + * one parent for each such node. + */ + notify_count = devfs_nmountplanes; + notify_count += dnp->dn_links; + for (dnp2 = dnp->dn_nextsibling; dnp2 != dnp; dnp2 = dnp2->dn_nextsibling) { + notify_count += dnp2->dn_links; + } + + return notify_count; } /*********************************************************************** - * remove all instances of this devicename [for backing nodes..] - * note.. if there is another link to the node (non dir nodes only) - * then the devfs_node will still exist as the ref count will be non-0 - * removing a directory node will remove all sup-nodes on all planes (ZAP) - * - * Used by device drivers to remove nodes that are no longer relevant - * The argument is the 'cookie' they were given when they created the node - * this function is exported.. see devfs.h - ***********************************************************************/ +* remove all instances of this devicename [for backing nodes..] +* note.. if there is another link to the node (non dir nodes only) +* then the devfs_node will still exist as the ref count will be non-0 +* removing a directory node will remove all sup-nodes on all planes (ZAP) +* +* Used by device drivers to remove nodes that are no longer relevant +* The argument is the 'cookie' they were given when they created the node +* this function is exported.. see devfs.h +***********************************************************************/ void devfs_remove(void *dirent_p) { devnode_t * dnp = ((devdirent_t *)dirent_p)->de_dnp; devnode_t * dnp2; - boolean_t funnel_state; + boolean_t lastlink; + struct devfs_event_log event_log; + uint32_t log_count = 0; + int do_notify = 0; + int need_free = 0; + struct devfs_vnode_event stackbuf[NUM_STACK_ENTRIES]; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + DEVFS_LOCK(); if (!devfs_ready) { printf("devfs_remove: not ready for devices!\n"); goto out; } - DEVFS_LOCK(0); + log_count = remove_notify_count(dnp); - /* keep removing the next sibling till only we exist. */ - while((dnp2 = dnp->dn_nextsibling) != dnp) { + if (log_count > NUM_STACK_ENTRIES) { + uint32_t new_count; +wrongsize: + DEVFS_UNLOCK(); + if (devfs_init_event_log(&event_log, log_count, NULL) == 0) { + do_notify = 1; + need_free = 1; + } + DEVFS_LOCK(); + + new_count = remove_notify_count(dnp); + if (need_free && (new_count > log_count)) { + devfs_release_event_log(&event_log, 1); + need_free = 0; + do_notify = 0; + log_count = log_count * 2; + goto wrongsize; + } + } else { + if (devfs_init_event_log(&event_log, NUM_STACK_ENTRIES, &stackbuf[0]) == 0) { + do_notify = 1; + } + } + /* This file has been deleted */ + if (do_notify != 0) { + devfs_record_event(&event_log, dnp, VNODE_EVENT_DELETE); + } + + /* keep removing the next sibling till only we exist. */ + while ((dnp2 = dnp->dn_nextsibling) != dnp) { /* * Keep removing the next front node till no more exist */ - dnp->dn_nextsibling = dnp2->dn_nextsibling; + dnp->dn_nextsibling = dnp2->dn_nextsibling; dnp->dn_nextsibling->dn_prevsiblingp = &(dnp->dn_nextsibling); dnp2->dn_nextsibling = dnp2; dnp2->dn_prevsiblingp = &(dnp2->dn_nextsibling); - while(dnp2->dn_linklist) { - dev_free_name(dnp2->dn_linklist); + + /* This file has been deleted in this plane */ + if (do_notify != 0) { + devfs_record_event(&event_log, dnp2, VNODE_EVENT_DELETE); + } + + if (dnp2->dn_linklist) { + do { + lastlink = (1 == dnp2->dn_links); + /* Each parent of a link to this file has lost a child in this plane */ + if (do_notify != 0) { + devfs_record_event(&event_log, dnp2->dn_linklist->de_parent, VNODE_EVENT_FILE_REMOVED); + } + dev_free_name(dnp2->dn_linklist); + } while (!lastlink); } } @@ -775,89 +899,116 @@ devfs_remove(void *dirent_p) * If we are not running in SPLIT_DEVS mode, then * THIS is what gets rid of the propogated nodes. */ - while(dnp->dn_linklist) { - dev_free_name(dnp->dn_linklist); + if (dnp->dn_linklist) { + do { + lastlink = (1 == dnp->dn_links); + /* Each parent of a link to this file has lost a child */ + if (do_notify != 0) { + devfs_record_event(&event_log, dnp->dn_linklist->de_parent, VNODE_EVENT_FILE_REMOVED); + } + dev_free_name(dnp->dn_linklist); + } while (!lastlink); } - DEVFS_UNLOCK(0); out: - (void) thread_funnel_set(kernel_flock, funnel_state); - return ; + DEVFS_UNLOCK(); + if (do_notify != 0) { + devfs_bulk_notify(&event_log); + devfs_release_event_log(&event_log, need_free); + } + + return; } + /*************************************************************** * duplicate the backing tree into a tree of nodes hung off the * mount point given as the argument. Do this by * calling dev_dup_entry which recurses all the way * up the tree.. + * + * called with DEVFS_LOCK held **************************************************************/ -/*proto*/ int dev_dup_plane(struct devfsmount *devfs_mp_p) { - devdirent_t * new; - int error = 0; + devdirent_t * new; + int error = 0; - if ((error = dev_dup_entry(NULL, dev_root, &new, devfs_mp_p))) - return error; + if ((error = dev_dup_entry(NULL, dev_root, &new, devfs_mp_p))) { + return error; + } devfs_mp_p->plane_root = new; + devfs_nmountplanes++; return error; } -/***************************************************************\ +/*************************************************************** * Free a whole plane -\***************************************************************/ -/*proto*/ +* +* called with DEVFS_LOCK held +***************************************************************/ void devfs_free_plane(struct devfsmount *devfs_mp_p) { devdirent_t * dirent_p; dirent_p = devfs_mp_p->plane_root; - if(dirent_p) { + if (dirent_p) { dev_free_hier(dirent_p); dev_free_name(dirent_p); } devfs_mp_p->plane_root = NULL; + devfs_nmountplanes--; + + if (devfs_nmountplanes > (devfs_nmountplanes + 1)) { + panic("plane count wrapped around.\n"); + } } -/***************************************************************\ -* Create and link in a new front element.. * -* Parent can be 0 for a root node * -* Not presently usable to make a symlink XXX * + +/*************************************************************** +* Create and link in a new front element.. +* Parent can be 0 for a root node +* Not presently usable to make a symlink XXX * (Ok, symlinks don't propogate) -* recursively will create subnodes corresponding to equivalent * -* child nodes in the base level * -\***************************************************************/ -/*proto*/ -int +* recursively will create subnodes corresponding to equivalent +* child nodes in the base level +* +* called with DEVFS_LOCK held +***************************************************************/ +static int dev_dup_entry(devnode_t * parent, devdirent_t * back, devdirent_t * *dnm_pp, - struct devfsmount *dvm) + struct devfsmount *dvm) { - devdirent_t * entry_p; - devdirent_t * newback; - devdirent_t * newfront; - int error; - devnode_t * dnp = back->de_dnp; + devdirent_t * entry_p = NULL; + devdirent_t * newback; + devdirent_t * newfront; + int error; + devnode_t * dnp = back->de_dnp; int type = dnp->dn_type; /* * go get the node made (if we need to) * use the back one as a prototype */ - if ((error = dev_add_entry(back->de_name, parent, type, - NULL, dnp, - parent?parent->dn_dvm:dvm, &entry_p)) != 0) { - printf("duplicating %s failed\n",back->de_name); + error = dev_add_entry(back->de_name, parent, type, NULL, dnp, + parent?parent->dn_dvm:dvm, &entry_p); + if (!error && (entry_p == NULL)) { + error = ENOMEM; /* Really can't happen, but make static analyzer happy */ + } + if (error != 0) { + printf("duplicating %s failed\n", back->de_name); + goto out; } /* * If we have just made the root, then insert the pointer to the * mount information */ - if(dvm) { + if (dvm) { entry_p->de_dnp->dn_dvm = dvm; } @@ -866,106 +1017,104 @@ dev_dup_entry(devnode_t * parent, devdirent_t * back, devdirent_t * *dnm_pp, * subnodes in it.... * note that this time we don't pass on the mount info.. */ - if (type == DEV_DIR) - { - for(newback = back->de_dnp->dn_typeinfo.Dir.dirlist; - newback; newback = newback->de_next) - { - if((error = dev_dup_entry(entry_p->de_dnp, - newback, &newfront, NULL)) != 0) - { + if (type == DEV_DIR) { + for (newback = back->de_dnp->dn_typeinfo.Dir.dirlist; + newback; newback = newback->de_next) { + if ((error = dev_dup_entry(entry_p->de_dnp, + newback, &newfront, NULL)) != 0) { break; /* back out with an error */ } } } +out: *dnm_pp = entry_p; return error; } -/***************************************************************\ -* Free a name node * -* remember that if there are other names pointing to the * -* dev_node then it may not get freed yet * -* can handle if there is no dnp * -\***************************************************************/ -/*proto*/ + +/*************************************************************** +* Free a name node +* remember that if there are other names pointing to the +* dev_node then it may not get freed yet +* can handle if there is no dnp +* +* called with DEVFS_LOCK held +***************************************************************/ + int dev_free_name(devdirent_t * dirent_p) { - devnode_t * parent = dirent_p->de_parent; - devnode_t * dnp = dirent_p->de_dnp; + devnode_t * parent = dirent_p->de_parent; + devnode_t * dnp = dirent_p->de_dnp; - if(dnp) { - if(dnp->dn_type == DEV_DIR) - { - devnode_t * p; + if (dnp) { + if (dnp->dn_type == DEV_DIR) { + devnode_t * p; - if(dnp->dn_typeinfo.Dir.dirlist) - return (ENOTEMPTY); + if (dnp->dn_typeinfo.Dir.dirlist) { + return ENOTEMPTY; + } p = dnp->dn_typeinfo.Dir.parent; - devfs_dn_free(dnp); /* account for '.' */ - devfs_dn_free(p); /* '..' */ + devfs_dn_free(dnp); /* account for '.' */ + devfs_dn_free(p); /* '..' */ } /* * unlink us from the list of links for this node * If we are the only link, it's easy! * if we are a DIR of course there should not be any * other links. - */ - if(dirent_p->de_nextlink == dirent_p) { - dnp->dn_linklist = NULL; + */ + if (dirent_p->de_nextlink == dirent_p) { + dnp->dn_linklist = NULL; } else { - if(dnp->dn_linklist == dirent_p) { + if (dnp->dn_linklist == dirent_p) { dnp->dn_linklist = dirent_p->de_nextlink; } - dirent_p->de_nextlink->de_prevlinkp - = dirent_p->de_prevlinkp; - *dirent_p->de_prevlinkp = dirent_p->de_nextlink; } devfs_dn_free(dnp); } + dirent_p->de_nextlink->de_prevlinkp = dirent_p->de_prevlinkp; + *(dirent_p->de_prevlinkp) = dirent_p->de_nextlink; + /* * unlink ourselves from the directory on this plane */ - if(parent) /* if not fs root */ - { - if( (*dirent_p->de_prevp = dirent_p->de_next) )/* yes, assign */ - { + if (parent) { /* if not fs root */ + if ((*dirent_p->de_prevp = dirent_p->de_next)) {/* yes, assign */ dirent_p->de_next->de_prevp = dirent_p->de_prevp; - } - else - { + } else { parent->dn_typeinfo.Dir.dirlast - = dirent_p->de_prevp; + = dirent_p->de_prevp; } parent->dn_typeinfo.Dir.entrycount--; parent->dn_len -= strlen(dirent_p->de_name) + 8; } DEVFS_DECR_ENTRIES(); - FREE(dirent_p,M_DEVFSNAME); + FREE(dirent_p, M_DEVFSNAME); return 0; } -/***************************************************************\ -* Free a hierarchy starting at a directory node name * -* remember that if there are other names pointing to the * -* dev_node then it may not get freed yet * -* can handle if there is no dnp * -* leave the node itself allocated. * -\***************************************************************/ -/*proto*/ -void + +/*************************************************************** +* Free a hierarchy starting at a directory node name +* remember that if there are other names pointing to the +* dev_node then it may not get freed yet +* can handle if there is no dnp +* leave the node itself allocated. +* +* called with DEVFS_LOCK held +***************************************************************/ + +static void dev_free_hier(devdirent_t * dirent_p) { - devnode_t * dnp = dirent_p->de_dnp; + devnode_t * dnp = dirent_p->de_dnp; - if(dnp) { - if(dnp->dn_type == DEV_DIR) - { - while(dnp->dn_typeinfo.Dir.dirlist) - { + if (dnp) { + if (dnp->dn_type == DEV_DIR) { + while (dnp->dn_typeinfo.Dir.dirlist) { dev_free_hier(dnp->dn_typeinfo.Dir.dirlist); dev_free_name(dnp->dn_typeinfo.Dir.dirlist); } @@ -973,84 +1122,369 @@ dev_free_hier(devdirent_t * dirent_p) } } -/***************************************************************\ -* given a dev_node, find the appropriate vnode if one is already -* associated, or get a new one and associate it with the dev_node -\***************************************************************/ -/*proto*/ + +/*************************************************************** + * given a dev_node, find the appropriate vnode if one is already + * associated, or get a new one and associate it with the dev_node + * + * called with DEVFS_LOCK held + * + * If an error is returned, then the dnp may have been freed (we + * raced with a delete and lost). A devnode should not be accessed + * after devfs_dntovn() fails. + ****************************************************************/ int -devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, struct proc * p) +devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, __unused struct proc * p) { - struct vnode *vn_p, *nvp; + struct vnode *vn_p; int error = 0; + struct vnode_fsparam vfsp; + enum vtype vtype = 0; + int markroot = 0; + int nretries = 0; + int n_minor = DEVFS_CLONE_ALLOC; /* new minor number for clone device */ + /* + * We should never come in and find that our devnode has been marked for delete. + * The lookup should have held the lock from entry until now; it should not have + * been able to find a removed entry. Any other pathway would have just created + * the devnode and come here without dropping the devfs lock, so no one would + * have a chance to delete. + */ + if (dnp->dn_lflags & DN_DELETE) { + panic("devfs_dntovn: DN_DELETE set on a devnode upon entry."); + } + + devfs_ref_node(dnp); + +retry: *vn_pp = NULL; vn_p = dnp->dn_vn; + if (vn_p) { /* already has a vnode */ - *vn_pp = vn_p; - return(vget(vn_p, LK_EXCLUSIVE, p)); - } - if (!(error = getnewvnode(VT_DEVFS, dnp->dn_dvm->mount, - *(dnp->dn_ops), &vn_p))) { - switch(dnp->dn_type) { - case DEV_SLNK: - vn_p->v_type = VLNK; - break; - case DEV_DIR: - if (dnp->dn_typeinfo.Dir.parent == dnp) { - vn_p->v_flag |= VROOT; + uint32_t vid; + + vid = vnode_vid(vn_p); + + DEVFS_UNLOCK(); + + /* + * We want to use the drainok variant of vnode_getwithvid + * because we _don't_ want to get an iocount if the vnode is + * is blocked in vnode_drain as it can cause infinite + * loops in vn_open_auth. While in use vnodes are typically + * only reclaimed on forced unmounts, In use devfs tty vnodes + * can be quite frequently reclaimed by revoke(2) or by the + * exit of a controlling process. + */ + error = vnode_getwithvid_drainok(vn_p, vid); + + DEVFS_LOCK(); + + if (dnp->dn_lflags & DN_DELETE) { + /* + * our BUSY node got marked for + * deletion while the DEVFS lock + * was dropped... + */ + if (error == 0) { + /* + * vnode_getwithvid returned a valid ref + * which we need to drop + */ + vnode_put(vn_p); } - vn_p->v_type = VDIR; - break; - case DEV_BDEV: - case DEV_CDEV: - vn_p->v_type - = (dnp->dn_type == DEV_BDEV) ? VBLK : VCHR; - if ((nvp = checkalias(vn_p, dnp->dn_typeinfo.dev, - dnp->dn_dvm->mount)) != NULL) { - vput(vn_p); - vn_p = nvp; + + /* + * This entry is no longer in the namespace. This is only + * possible for lookup: no other path would not find an existing + * vnode. Therefore, ENOENT is a valid result. + */ + error = ENOENT; + } else if (error == ENODEV) { + /* + * The Filesystem is getting unmounted. + */ + error = ENOENT; + } else if (error && (nretries < DEV_MAX_VNODE_RETRY)) { + /* + * If we got an error from vnode_getwithvid, it means + * we raced with a recycle and lost i.e. we asked for + * an iocount only after vnode_drain had been entered + * for the vnode and returned with an error only after + * devfs_reclaim was called on the vnode. devfs_reclaim + * sets dn_vn to NULL but while we were waiting to + * reacquire DEVFS_LOCK, another vnode might have gotten + * associated with the dnp. In either case, we need to + * retry otherwise we will end up returning an ENOENT + * for this lookup but the next lookup will succeed + * because it creates a new vnode (or a racing lookup + * created a new vnode already). + */ + error = 0; + nretries++; + goto retry; + } + if (!error) { + *vn_pp = vn_p; + } + + goto out; + } + + /* + * If we get here, then we've beaten any deletes; + * if someone sets DN_DELETE during a subsequent drop + * of the devfs lock, we'll still vend a vnode. + */ + + if (dnp->dn_lflags & DN_CREATE) { + dnp->dn_lflags |= DN_CREATEWAIT; + msleep(&dnp->dn_lflags, &devfs_mutex, PRIBIO, 0, 0); + goto retry; + } + + dnp->dn_lflags |= DN_CREATE; + + switch (dnp->dn_type) { + case DEV_SLNK: + vtype = VLNK; + break; + case DEV_DIR: + if (dnp->dn_typeinfo.Dir.parent == dnp) { + markroot = 1; + } + vtype = VDIR; + break; + case DEV_BDEV: + case DEV_CDEV: + vtype = (dnp->dn_type == DEV_BDEV) ? VBLK : VCHR; + break; +#if FDESC + case DEV_DEVFD: + vtype = VDIR; + break; +#endif /* FDESC */ + } + vfsp.vnfs_mp = dnp->dn_dvm->mount; + vfsp.vnfs_vtype = vtype; + vfsp.vnfs_str = "devfs"; + vfsp.vnfs_dvp = 0; + vfsp.vnfs_fsnode = dnp; + vfsp.vnfs_cnp = 0; + vfsp.vnfs_vops = *(dnp->dn_ops); + + if (vtype == VBLK || vtype == VCHR) { + /* + * Ask the clone minor number function for a new minor number + * to use for the next device instance. If an administative + * limit has been reached, this function will return -1. + */ + if (dnp->dn_clone != NULL) { + int n_major = major(dnp->dn_typeinfo.dev); + + n_minor = (*dnp->dn_clone)(dnp->dn_typeinfo.dev, DEVFS_CLONE_ALLOC); + if (n_minor == -1) { + error = ENOMEM; + goto out; } - break; + + vfsp.vnfs_rdev = makedev(n_major, n_minor);; + } else { + vfsp.vnfs_rdev = dnp->dn_typeinfo.dev; + } + } else { + vfsp.vnfs_rdev = 0; + } + vfsp.vnfs_filesize = 0; + vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE; + /* Tag system files */ + vfsp.vnfs_marksystem = 0; + vfsp.vnfs_markroot = markroot; + + DEVFS_UNLOCK(); + + error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vn_p); + + /* Do this before grabbing the lock */ + if (error == 0) { + vnode_setneedinactive(vn_p); + } + + DEVFS_LOCK(); + + if (error == 0) { + vnode_settag(vn_p, VT_DEVFS); + + if ((dnp->dn_clone != NULL) && (dnp->dn_vn != NULLVP)) { + panic("devfs_dntovn: cloning device with a vnode?\n"); } - vn_p->v_mount = dnp->dn_dvm->mount;/* XXX Duplicated */ + *vn_pp = vn_p; - vn_p->v_data = (void *)dnp; - dnp->dn_vn = vn_p; - error = vn_lock(vn_p, LK_EXCLUSIVE | LK_RETRY, p); + + /* + * Another vnode that has this devnode as its v_data. + * This reference, unlike the one taken at the start + * of the function, persists until a VNOP_RECLAIM + * comes through for this vnode. + */ + devfs_ref_node(dnp); + + /* + * A cloned vnode is not hooked into the devnode; every lookup + * gets a new vnode. + */ + if (dnp->dn_clone == NULL) { + dnp->dn_vn = vn_p; + } + } else if (n_minor != DEVFS_CLONE_ALLOC) { + /* + * If we failed the create, we need to release the cloned minor + * back to the free list. In general, this is only useful if + * the clone function results in a state change in the cloned + * device for which the minor number was obtained. If we get + * past this point withouth falling into this case, it's + * assumed that any state to be released will be released when + * the vnode is dropped, instead. + */ + (void)(*dnp->dn_clone)(dnp->dn_typeinfo.dev, DEVFS_CLONE_FREE); + } + + dnp->dn_lflags &= ~DN_CREATE; + if (dnp->dn_lflags & DN_CREATEWAIT) { + dnp->dn_lflags &= ~DN_CREATEWAIT; + wakeup(&dnp->dn_lflags); } + +out: + /* + * Release the reference we took to prevent deletion while we weren't holding the lock. + * If not returning success, then dropping this reference could delete the devnode; + * no one should access a devnode after a call to devfs_dntovn fails. + */ + devfs_rele_node(dnp); + return error; } -/***********************************************************************\ -* add a whole device, with no prototype.. make name element and node * -* Used for adding the original device entries * -\***********************************************************************/ -/*proto*/ +/* + * Increment refcount on a devnode; prevents free of the node + * while the devfs lock is not held. + */ +void +devfs_ref_node(devnode_t *dnp) +{ + os_ref_retain_locked_raw(&dnp->dn_refcount, &devfs_refgrp); +} + +/* + * Release a reference on a devnode. If the devnode is marked for + * free and the refcount is dropped to one, do the free. + */ +void +devfs_rele_node(devnode_t *dnp) +{ + os_ref_count_t rc = os_ref_release_locked_raw(&dnp->dn_refcount, &devfs_refgrp); + if (rc < 1) { + panic("devfs_rele_node: devnode without a refcount!\n"); + } else if ((rc == 1) && (dnp->dn_lflags & DN_DELETE)) { + /* release final reference from dev_add_node */ + (void) os_ref_release_locked_raw(&dnp->dn_refcount, &devfs_refgrp); + devnode_free(dnp); + } +} + +/*********************************************************************** +* add a whole device, with no prototype.. make name element and node +* Used for adding the original device entries +* +* called with DEVFS_LOCK held +***********************************************************************/ int -dev_add_entry(char *name, devnode_t * parent, int type, devnode_type_t * typeinfo, - devnode_t * proto, struct devfsmount *dvm, devdirent_t * *nm_pp) +dev_add_entry(const char *name, devnode_t * parent, int type, devnode_type_t * typeinfo, + devnode_t * proto, struct devfsmount *dvm, devdirent_t * *nm_pp) { - devnode_t * dnp; - int error = 0; + devnode_t * dnp; + int error = 0; - if ((error = dev_add_node(type, typeinfo, proto, &dnp, - (parent?parent->dn_dvm:dvm))) != 0) - { + if ((error = dev_add_node(type, typeinfo, proto, &dnp, + (parent?parent->dn_dvm:dvm))) != 0) { printf("devfs: %s: base node allocation failed (Errno=%d)\n", - name,error); + name, error); return error; } - if ((error = dev_add_name(name ,parent ,NULL, dnp, nm_pp)) != 0) - { + if ((error = dev_add_name(name, parent, NULL, dnp, nm_pp)) != 0) { devfs_dn_free(dnp); /* 1->0 for dir, 0->(-1) for other */ printf("devfs: %s: name slot allocation failed (Errno=%d)\n", - name,error); - + name, error); } return error; } +static void +devfs_bulk_notify(devfs_event_log_t delp) +{ + uint32_t i; + for (i = 0; i < delp->del_used; i++) { + devfs_vnode_event_t dvep = &delp->del_entries[i]; + if (vnode_getwithvid(dvep->dve_vp, dvep->dve_vid) == 0) { + vnode_notify(dvep->dve_vp, dvep->dve_events, NULL); + vnode_put(dvep->dve_vp); + } + } +} + +static void +devfs_record_event(devfs_event_log_t delp, devnode_t *dnp, uint32_t events) +{ + if (delp->del_used >= delp->del_max) { + panic("devfs event log overflowed.\n"); + } + + /* Can only notify for nodes that have an associated vnode */ + if (dnp->dn_vn != NULLVP && vnode_ismonitored(dnp->dn_vn)) { + devfs_vnode_event_t dvep = &delp->del_entries[delp->del_used]; + dvep->dve_vp = dnp->dn_vn; + dvep->dve_vid = vnode_vid(dnp->dn_vn); + dvep->dve_events = events; + delp->del_used++; + } +} + +static int +devfs_init_event_log(devfs_event_log_t delp, uint32_t count, devfs_vnode_event_t buf) +{ + devfs_vnode_event_t dvearr; + + if (buf == NULL) { + MALLOC(dvearr, devfs_vnode_event_t, count * sizeof(struct devfs_vnode_event), M_TEMP, M_WAITOK | M_ZERO); + if (dvearr == NULL) { + return ENOMEM; + } + } else { + dvearr = buf; + } + + delp->del_max = count; + delp->del_used = 0; + delp->del_entries = dvearr; + return 0; +} + +static void +devfs_release_event_log(devfs_event_log_t delp, int need_free) +{ + if (delp->del_entries == NULL) { + panic("Free of devfs notify info that has not been intialized.\n"); + } + + if (need_free) { + FREE(delp->del_entries, M_TEMP); + } + + delp->del_entries = NULL; +} + /* * Function: devfs_make_node * @@ -1058,49 +1492,114 @@ dev_add_entry(char *name, devnode_t * parent, int type, devnode_type_t * typeinf * Create a device node with the given pathname in the devfs namespace. * * Parameters: - * dev - the dev_t value to associate + * dev - the dev_t value to associate * chrblk - block or character device (DEVFS_CHAR or DEVFS_BLOCK) * uid, gid - ownership * perms - permissions + * clone - minor number cloning function * fmt, ... - path format string with printf args to format the path name * Returns: * A handle to a device node if successful, NULL otherwise. */ void * -devfs_make_node(dev_t dev, int chrblk, uid_t uid, - gid_t gid, int perms, char *fmt, ...) +devfs_make_node_clone(dev_t dev, int chrblk, uid_t uid, + gid_t gid, int perms, int (*clone)(dev_t dev, int action), + const char *fmt, ...) { - devdirent_t * new_dev = NULL; - devnode_t * dnp; /* devnode for parent directory */ - devnode_type_t typeinfo; - - char *name, *path, buf[256]; /* XXX */ - boolean_t funnel_state; - int i; + devdirent_t * new_dev = NULL; + devfstype_t type; va_list ap; - funnel_state = thread_funnel_set(kernel_flock, TRUE); - - if (!devfs_ready) { - printf("devfs_make_node: not ready for devices!\n"); + switch (chrblk) { + case DEVFS_CHAR: + type = DEV_CDEV; + break; + case DEVFS_BLOCK: + type = DEV_BDEV; + break; + default: goto out; } - if (chrblk != DEVFS_CHAR && chrblk != DEVFS_BLOCK) + va_start(ap, fmt); + new_dev = devfs_make_node_internal(dev, type, uid, gid, perms, clone, fmt, ap); + va_end(ap); +out: + return new_dev; +} + + +/* + * Function: devfs_make_node + * + * Purpose + * Create a device node with the given pathname in the devfs namespace. + * + * Parameters: + * dev - the dev_t value to associate + * chrblk - block or character device (DEVFS_CHAR or DEVFS_BLOCK) + * uid, gid - ownership + * perms - permissions + * fmt, ... - path format string with printf args to format the path name + * Returns: + * A handle to a device node if successful, NULL otherwise. + */ +void * +devfs_make_node(dev_t dev, int chrblk, uid_t uid, + gid_t gid, int perms, const char *fmt, ...) +{ + devdirent_t * new_dev = NULL; + devfstype_t type; + va_list ap; + + if (chrblk != DEVFS_CHAR && chrblk != DEVFS_BLOCK) { goto out; + } + + type = (chrblk == DEVFS_BLOCK ? DEV_BDEV : DEV_CDEV); va_start(ap, fmt); - vsnprintf(buf, sizeof(buf), fmt, ap); + new_dev = devfs_make_node_internal(dev, type, uid, gid, perms, NULL, fmt, ap); va_end(ap); +out: + return new_dev; +} + +static devdirent_t * +devfs_make_node_internal(dev_t dev, devfstype_t type, uid_t uid, + gid_t gid, int perms, int (*clone)(dev_t dev, int action), const char *fmt, va_list ap) +{ + devdirent_t * new_dev = NULL; + devnode_t * dnp; + devnode_type_t typeinfo; + + char *name, buf[256]; /* XXX */ + const char *path; +#if CONFIG_MACF + char buff[sizeof(buf)]; +#endif + size_t i; + uint32_t log_count; + struct devfs_event_log event_log; + struct devfs_vnode_event stackbuf[NUM_STACK_ENTRIES]; + int need_free = 0; + + vsnprintf(buf, sizeof(buf), fmt, ap); + +#if CONFIG_MACF + bcopy(buf, buff, sizeof(buff)); + buff[sizeof(buff) - 1] = 0; +#endif name = NULL; - for(i=strlen(buf); i>0; i--) - if(buf[i] == '/') { - name=&buf[i]; - buf[i]=0; + for (i = strlen(buf); i > 0; i--) { + if (buf[i] == '/') { + name = &buf[i]; + buf[i] = 0; break; } + } if (name) { *name++ = '\0'; @@ -1110,23 +1609,54 @@ devfs_make_node(dev_t dev, int chrblk, uid_t uid, path = "/"; } - DEVFS_LOCK(0); + log_count = devfs_nmountplanes; + if (log_count > NUM_STACK_ENTRIES) { +wrongsize: + need_free = 1; + if (devfs_init_event_log(&event_log, log_count, NULL) != 0) { + return NULL; + } + } else { + need_free = 0; + log_count = NUM_STACK_ENTRIES; + if (devfs_init_event_log(&event_log, log_count, &stackbuf[0]) != 0) { + return NULL; + } + } + + DEVFS_LOCK(); + if (log_count < devfs_nmountplanes) { + DEVFS_UNLOCK(); + devfs_release_event_log(&event_log, need_free); + log_count = log_count * 2; + goto wrongsize; + } + + if (!devfs_ready) { + printf("devfs_make_node: not ready for devices!\n"); + goto out; + } + /* find/create directory path ie. mkdir -p */ - if (dev_finddir(path, NULL, CREATE, &dnp) == 0) { - typeinfo.dev = dev; - if (dev_add_entry(name, dnp, - (chrblk == DEVFS_CHAR) ? DEV_CDEV : DEV_BDEV, - &typeinfo, NULL, NULL, &new_dev) == 0) { - new_dev->de_dnp->dn_gid = gid; - new_dev->de_dnp->dn_uid = uid; - new_dev->de_dnp->dn_mode |= perms; - devfs_propogate(dnp->dn_typeinfo.Dir.myname, new_dev); - } - } - DEVFS_UNLOCK(0); + if (dev_finddir(path, NULL, DEVFS_CREATE, &dnp, &event_log) == 0) { + typeinfo.dev = dev; + if (dev_add_entry(name, dnp, type, &typeinfo, NULL, NULL, &new_dev) == 0) { + new_dev->de_dnp->dn_gid = gid; + new_dev->de_dnp->dn_uid = uid; + new_dev->de_dnp->dn_mode |= perms; + new_dev->de_dnp->dn_clone = clone; +#if CONFIG_MACF + mac_devfs_label_associate_device(dev, new_dev->de_dnp, buff); +#endif + devfs_propogate(dnp->dn_typeinfo.Dir.myname, new_dev, &event_log); + } + } out: - (void) thread_funnel_set(kernel_flock, funnel_state); + DEVFS_UNLOCK(); + + devfs_bulk_notify(&event_log); + devfs_release_event_log(&event_log, need_free); return new_dev; } @@ -1142,21 +1672,24 @@ out: int devfs_make_link(void *original, char *fmt, ...) { - devdirent_t * new_dev = NULL; - devdirent_t * orig = (devdirent_t *) original; - devnode_t * dirnode; /* devnode for parent directory */ + devdirent_t * new_dev = NULL; + devdirent_t * orig = (devdirent_t *) original; + devnode_t * dirnode; /* devnode for parent directory */ + struct devfs_event_log event_log; + uint32_t log_count; va_list ap; char *p, buf[256]; /* XXX */ - int i; - boolean_t funnel_state; + size_t i; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + DEVFS_LOCK(); if (!devfs_ready) { + DEVFS_UNLOCK(); printf("devfs_make_link: not ready for devices!\n"); - goto out; + return -1; } + DEVFS_UNLOCK(); va_start(ap, fmt); vsnprintf(buf, sizeof(buf), fmt, ap); @@ -1164,28 +1697,52 @@ devfs_make_link(void *original, char *fmt, ...) p = NULL; - for(i=strlen(buf); i>0; i--) - if(buf[i] == '/') { - p=&buf[i]; - buf[i]=0; - break; + for (i = strlen(buf); i > 0; i--) { + if (buf[i] == '/') { + p = &buf[i]; + buf[i] = 0; + break; } - DEVFS_LOCK(0); + } + + /* + * One slot for each directory, one for each devnode + * whose link count changes + */ + log_count = devfs_nmountplanes * 2; +wrongsize: + if (devfs_init_event_log(&event_log, log_count, NULL) != 0) { + /* No lock held, no allocations done, can just return */ + return -1; + } + + DEVFS_LOCK(); + + if (log_count < devfs_nmountplanes) { + DEVFS_UNLOCK(); + devfs_release_event_log(&event_log, 1); + log_count = log_count * 2; + goto wrongsize; + } + if (p) { - *p++ = '\0'; - if (dev_finddir(buf, NULL, CREATE, &dirnode) - || dev_add_name(p, dirnode, NULL, orig->de_dnp, &new_dev)) - goto fail; + *p++ = '\0'; + + if (dev_finddir(buf, NULL, DEVFS_CREATE, &dirnode, &event_log) + || dev_add_name(p, dirnode, NULL, orig->de_dnp, &new_dev)) { + goto fail; + } } else { - if (dev_finddir("", NULL, CREATE, &dirnode) - || dev_add_name(buf, dirnode, NULL, orig->de_dnp, &new_dev)) - goto fail; + if (dev_finddir("", NULL, DEVFS_CREATE, &dirnode, &event_log) + || dev_add_name(buf, dirnode, NULL, orig->de_dnp, &new_dev)) { + goto fail; + } } - devfs_propogate(dirnode->dn_typeinfo.Dir.myname, new_dev); + devfs_propogate(dirnode->dn_typeinfo.Dir.myname, new_dev, &event_log); fail: - DEVFS_UNLOCK(0); -out: - (void) thread_funnel_set(kernel_flock, funnel_state); - return ((new_dev != NULL) ? 0 : -1); -} + DEVFS_UNLOCK(); + devfs_bulk_notify(&event_log); + devfs_release_event_log(&event_log, 1); + return (new_dev != NULL) ? 0 : -1; +}