X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/378393581903b274cb7a4d18e0d978071a6b592d..3903760236c30e3b5ace7a4eefac3a269d68957c:/bsd/miscfs/devfs/devfs_tree.c diff --git a/bsd/miscfs/devfs/devfs_tree.c b/bsd/miscfs/devfs/devfs_tree.c index ff61c3d5a..21912549a 100644 --- a/bsd/miscfs/devfs/devfs_tree.c +++ b/bsd/miscfs/devfs/devfs_tree.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -47,6 +53,12 @@ * * devfs_tree.c */ +/* + * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ /* * HISTORY @@ -84,35 +96,74 @@ #include #include #include -#include +#include #include - +#include +#define BSD_KERNEL_PRIVATE 1 /* devfs_make_link() prototype */ #include "devfs.h" #include "devfsdefs.h" -static void devfs_release_busy(devnode_t *); +#if CONFIG_MACF +#include +#endif + +#if FDESC +#include "fdesc.h" +#endif + +typedef struct devfs_vnode_event { + vnode_t dve_vp; + uint32_t dve_vid; + uint32_t dve_events; +} *devfs_vnode_event_t; + +/* + * Size of stack buffer (fast path) for notifications. If + * the number of mounts is small, no need to malloc a buffer. + */ +#define NUM_STACK_ENTRIES 5 + +typedef struct devfs_event_log { + size_t del_max; + size_t del_used; + devfs_vnode_event_t del_entries; +} *devfs_event_log_t; + + static void dev_free_hier(devdirent_t *); -static int devfs_propogate(devdirent_t *, devdirent_t *); -static int dev_finddir(char *, devnode_t *, int, devnode_t **); +static int devfs_propogate(devdirent_t *, devdirent_t *, devfs_event_log_t); +static int dev_finddir(const char *, devnode_t *, int, devnode_t **, devfs_event_log_t); static int dev_dup_entry(devnode_t *, devdirent_t *, devdirent_t **, struct devfsmount *); +void devfs_ref_node(devnode_t *); +void devfs_rele_node(devnode_t *); +static void devfs_record_event(devfs_event_log_t, devnode_t*, uint32_t); +static int devfs_init_event_log(devfs_event_log_t, uint32_t, devfs_vnode_event_t); +static void devfs_release_event_log(devfs_event_log_t, int); +static void devfs_bulk_notify(devfs_event_log_t); +static devdirent_t *devfs_make_node_internal(dev_t, devfstype_t type, uid_t, gid_t, int, + int (*clone)(dev_t dev, int action), const char *fmt, va_list ap); lck_grp_t * devfs_lck_grp; lck_grp_attr_t * devfs_lck_grp_attr; lck_attr_t * devfs_lck_attr; lck_mtx_t devfs_mutex; +lck_mtx_t devfs_attr_mutex; devdirent_t * dev_root = NULL; /* root of backing tree */ struct devfs_stats devfs_stats; /* hold stats */ +static ino_t devfs_unique_fileno = 0; + #ifdef HIDDEN_MOUNTPOINT static struct mount *devfs_hidden_mount; #endif /* HIDDEN_MOINTPOINT */ static int devfs_ready = 0; +static uint32_t devfs_nmountplanes = 0; /* The first plane is not used for a mount */ -#define NOCREATE FALSE -#define CREATE TRUE +#define DEVFS_NOCREATE FALSE +#define DEVFS_CREATE TRUE /* * Set up the root directory node in the backing plane @@ -127,16 +178,15 @@ static int devfs_ready = 0; int devfs_sinit(void) { - int error; + int error; - devfs_lck_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(devfs_lck_grp_attr); + devfs_lck_grp_attr = lck_grp_attr_alloc_init(); devfs_lck_grp = lck_grp_alloc_init("devfs_lock", devfs_lck_grp_attr); devfs_lck_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(devfs_lck_attr); lck_mtx_init(&devfs_mutex, devfs_lck_grp, devfs_lck_attr); + lck_mtx_init(&devfs_attr_mutex, devfs_lck_grp, devfs_lck_attr); DEVFS_LOCK(); error = dev_add_entry("root", NULL, DEV_DIR, NULL, NULL, NULL, &dev_root); @@ -154,15 +204,26 @@ devfs_sinit(void) TAILQ_INIT(&devfs_hidden_mount->mnt_vnodelist); TAILQ_INIT(&devfs_hidden_mount->mnt_workerqueue); TAILQ_INIT(&devfs_hidden_mount->mnt_newvnodes); +#if CONFIG_MACF + mac_mount_label_init(devfs_hidden_mount); + mac_mount_label_associate(vfs_context_kernel(), devfs_hidden_mount); +#endif /* Initialize the default IO constraints */ mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; + mp->mnt_ioflags = 0; + mp->mnt_realrootvp = NULLVP; + mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL; devfs_mount(devfs_hidden_mount,"dummy",NULL,NULL,NULL); dev_root->de_dnp->dn_dvm = (struct devfsmount *)devfs_hidden_mount->mnt_data; #endif /* HIDDEN_MOUNTPOINT */ +#if CONFIG_MACF + mac_devfs_label_associate_directory("/", strlen("/"), + dev_root->de_dnp, "/"); +#endif devfs_ready = 1; return (0); } @@ -182,7 +243,7 @@ devfs_sinit(void) * called with DEVFS_LOCK held ***************************************************************/ devdirent_t * -dev_findname(devnode_t * dir, char *name) +dev_findname(devnode_t * dir, const char *name) { devdirent_t * newfp; if (dir->dn_type != DEV_DIR) return 0;/*XXX*/ /* printf?*/ @@ -203,7 +264,7 @@ dev_findname(devnode_t * dir, char *name) while(newfp) { - if(!(strcmp(name,newfp->de_name))) + if(!(strncmp(name, newfp->de_name, sizeof(newfp->de_name)))) return newfp; newfp = newfp->de_next; } @@ -212,7 +273,7 @@ dev_findname(devnode_t * dir, char *name) /*********************************************************************** * Given a starting node (0 for root) and a pathname, return the node - * for the end item on the path. It MUST BE A DIRECTORY. If the 'CREATE' + * for the end item on the path. It MUST BE A DIRECTORY. If the 'DEVFS_CREATE' * option is true, then create any missing nodes in the path and create * and return the final node as well. * This is used to set up a directory, before making nodes in it.. @@ -220,14 +281,18 @@ dev_findname(devnode_t * dir, char *name) * called with DEVFS_LOCK held ***********************************************************************/ static int -dev_finddir(char * path, +dev_finddir(const char * path, devnode_t * dirnode, int create, - devnode_t * * dn_pp) + devnode_t * * dn_pp, + devfs_event_log_t delp) { devnode_t * dnp = NULL; int error = 0; - char * scan; + const char * scan; +#if CONFIG_MACF + char fullpath[DEVMAXPATHSIZE]; +#endif if (!dirnode) /* dirnode == NULL means start at root */ @@ -239,6 +304,9 @@ dev_finddir(char * path, if (strlen(path) > (DEVMAXPATHSIZE - 1)) return ENAMETOOLONG; +#if CONFIG_MACF + strlcpy (fullpath, path, DEVMAXPATHSIZE); +#endif scan = path; while (*scan == '/') @@ -249,7 +317,7 @@ dev_finddir(char * path, while (1) { char component[DEVMAXPATHSIZE]; devdirent_t * dirent_p; - char * start; + const char * start; if (*scan == 0) { /* we hit the end of the string, we're done */ @@ -260,8 +328,7 @@ dev_finddir(char * path, while (*scan != '/' && *scan) scan++; - strncpy(component, start, scan - start); - component[ scan - start ] = '\0'; + strlcpy(component, start, scan - start); if (*scan == '/') scan++; @@ -283,7 +350,13 @@ dev_finddir(char * path, if (error) break; dnp = dirent_p->de_dnp; - devfs_propogate(dirnode->dn_typeinfo.Dir.myname, dirent_p); +#if CONFIG_MACF + mac_devfs_label_associate_directory( + dirnode->dn_typeinfo.Dir.myname->de_name, + strlen(dirnode->dn_typeinfo.Dir.myname->de_name), + dnp, fullpath); +#endif + devfs_propogate(dirnode->dn_typeinfo.Dir.myname, dirent_p, delp); } dirnode = dnp; /* continue relative to this directory */ } @@ -301,7 +374,7 @@ dev_finddir(char * path, * called with DEVFS_LOCK held ***********************************************************************/ int -dev_add_name(char * name, devnode_t * dirnode, __unused devdirent_t * back, +dev_add_name(const char * name, devnode_t * dirnode, __unused devdirent_t * back, devnode_t * dnp, devdirent_t * *dirent_pp) { devdirent_t * dirent_p = NULL; @@ -386,7 +459,7 @@ dev_add_name(char * name, devnode_t * dirnode, __unused devdirent_t * back, /* * put the name into the directory entry. */ - strcpy(dirent_p->de_name, name); + strlcpy(dirent_p->de_name, name, DEVMAXNAMESIZE); /* @@ -420,7 +493,7 @@ dev_add_name(char * name, devnode_t * dirnode, __unused devdirent_t * back, * reused. (Is a DIR, or we select SPLIT_DEVS at compile time) * typeinfo gives us info to make our node if we don't have a prototype. * If typeinfo is null and proto exists, then the typeinfo field of - * the proto is used intead in the CREATE case. + * the proto is used intead in the DEVFS_CREATE case. * note the 'links' count is 0 (except if a dir) * but it is only cleared on a transition * so this is ok till we link it to something @@ -482,6 +555,10 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, *(dnp->dn_prevsiblingp) = dnp; dnp->dn_nextsibling = proto; proto->dn_prevsiblingp = &(dnp->dn_nextsibling); +#if CONFIG_MACF + mac_devfs_label_init(dnp); + mac_devfs_label_copy(proto->dn_label, dnp->dn_label); +#endif } else { struct timeval tv; @@ -496,8 +573,14 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, dnp->dn_atime.tv_sec = tv.tv_sec; dnp->dn_mtime.tv_sec = tv.tv_sec; dnp->dn_ctime.tv_sec = tv.tv_sec; +#if CONFIG_MACF + mac_devfs_label_init(dnp); +#endif } dnp->dn_dvm = dvm; + dnp->dn_refcount = 0; + dnp->dn_ino = devfs_unique_fileno; + devfs_unique_fileno++; /* * fill out the dev node according to type @@ -538,9 +621,8 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, FREE(dnp,M_DEVFSNODE); return ENOMEM; } - strncpy(dnp->dn_typeinfo.Slnk.name, typeinfo->Slnk.name, - typeinfo->Slnk.namelen); - dnp->dn_typeinfo.Slnk.name[typeinfo->Slnk.namelen] = '\0'; + strlcpy(dnp->dn_typeinfo.Slnk.name, typeinfo->Slnk.name, + typeinfo->Slnk.namelen + 1); dnp->dn_typeinfo.Slnk.namelen = typeinfo->Slnk.namelen; DEVFS_INCR_STRINGSPACE(dnp->dn_typeinfo.Slnk.namelen + 1); dnp->dn_ops = &devfs_vnodeop_p; @@ -555,6 +637,15 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, dnp->dn_ops = &devfs_spec_vnodeop_p; dnp->dn_typeinfo.dev = typeinfo->dev; break; + + #if FDESC + /* /dev/fd is special */ + case DEV_DEVFD: + dnp->dn_ops = &devfs_devfd_vnodeop_p; + dnp->dn_mode |= 0555; /* default perms */ + break; + + #endif /* FDESC */ default: return EINVAL; } @@ -571,10 +662,9 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, void devnode_free(devnode_t * dnp) { - if (dnp->dn_lflags & DN_BUSY) { - dnp->dn_lflags |= DN_DELETE; - return; - } +#if CONFIG_MACF + mac_devfs_label_destroy(dnp); +#endif if (dnp->dn_type == DEV_SLNK) { DEVFS_DECR_STRINGSPACE(dnp->dn_typeinfo.Slnk.namelen + 1); FREE(dnp->dn_typeinfo.Slnk.name, M_DEVFSNODE); @@ -599,11 +689,13 @@ devfs_dn_free(devnode_t * dnp) dnp->dn_nextsibling->dn_prevsiblingp = prevp; } - if (dnp->dn_vn == NULL) { - devnode_free(dnp); /* no accesses/references */ + + /* Can only free if there are no references; otherwise, wait for last vnode to be reclaimed */ + if (dnp->dn_refcount == 0) { + devnode_free(dnp); } else { - dnp->dn_delete = TRUE; + dnp->dn_lflags |= DN_DELETE; } } } @@ -628,7 +720,7 @@ devfs_dn_free(devnode_t * dnp) * called with DEVFS_LOCK held ***********************************************************************/ static int -devfs_propogate(devdirent_t * parent,devdirent_t * child) +devfs_propogate(devdirent_t * parent,devdirent_t * child, devfs_event_log_t delp) { int error; devdirent_t * newnmp; @@ -636,6 +728,12 @@ devfs_propogate(devdirent_t * parent,devdirent_t * child) devnode_t * pdnp = parent->de_dnp; devnode_t * adnp = parent->de_dnp; int type = child->de_dnp->dn_type; + uint32_t events; + + events = (dnp->dn_type == DEV_DIR ? VNODE_EVENT_DIR_CREATED : VNODE_EVENT_FILE_CREATED); + if (delp != NULL) { + devfs_record_event(delp, pdnp, events); + } /*********************************************** * Find the other instances of the parent node @@ -653,11 +751,45 @@ devfs_propogate(devdirent_t * parent,devdirent_t * child) NULL, dnp, adnp->dn_dvm, &newnmp)) != 0) { printf("duplicating %s failed\n",child->de_name); + } else { + if (delp != NULL) { + devfs_record_event(delp, adnp, events); + + /* + * Slightly subtle. We're guaranteed that there will + * only be a vnode hooked into this devnode if we're creating + * a new link to an existing node; otherwise, the devnode is new + * and no one can have looked it up yet. If we're making a link, + * then the buffer is large enough for two nodes in each + * plane; otherwise, there's no vnode and this call will + * do nothing. + */ + devfs_record_event(delp, newnmp->de_dnp, VNODE_EVENT_LINK); + } } } return 0; /* for now always succeed */ } +static uint32_t +remove_notify_count(devnode_t *dnp) +{ + uint32_t notify_count = 0; + devnode_t *dnp2; + + /* + * Could need to notify for one removed node on each mount and + * one parent for each such node. + */ + notify_count = devfs_nmountplanes; + notify_count += dnp->dn_links; + for (dnp2 = dnp->dn_nextsibling; dnp2 != dnp; dnp2 = dnp2->dn_nextsibling) { + notify_count += dnp2->dn_links; + } + + return notify_count; + +} /*********************************************************************** * remove all instances of this devicename [for backing nodes..] @@ -675,7 +807,12 @@ devfs_remove(void *dirent_p) devnode_t * dnp = ((devdirent_t *)dirent_p)->de_dnp; devnode_t * dnp2; boolean_t lastlink; - + struct devfs_event_log event_log; + uint32_t log_count = 0; + int do_notify = 0; + int need_free = 0; + struct devfs_vnode_event stackbuf[NUM_STACK_ENTRIES]; + DEVFS_LOCK(); if (!devfs_ready) { @@ -683,6 +820,37 @@ devfs_remove(void *dirent_p) goto out; } + log_count = remove_notify_count(dnp); + + if (log_count > NUM_STACK_ENTRIES) { + uint32_t new_count; +wrongsize: + DEVFS_UNLOCK(); + if (devfs_init_event_log(&event_log, log_count, NULL) == 0) { + do_notify = 1; + need_free = 1; + } + DEVFS_LOCK(); + + new_count = remove_notify_count(dnp); + if (need_free && (new_count > log_count)) { + devfs_release_event_log(&event_log, 1); + need_free = 0; + do_notify = 0; + log_count = log_count * 2; + goto wrongsize; + } + } else { + if (devfs_init_event_log(&event_log, NUM_STACK_ENTRIES, &stackbuf[0]) == 0) { + do_notify = 1; + } + } + + /* This file has been deleted */ + if (do_notify != 0) { + devfs_record_event(&event_log, dnp, VNODE_EVENT_DELETE); + } + /* keep removing the next sibling till only we exist. */ while ((dnp2 = dnp->dn_nextsibling) != dnp) { @@ -693,9 +861,19 @@ devfs_remove(void *dirent_p) dnp->dn_nextsibling->dn_prevsiblingp = &(dnp->dn_nextsibling); dnp2->dn_nextsibling = dnp2; dnp2->dn_prevsiblingp = &(dnp2->dn_nextsibling); + + /* This file has been deleted in this plane */ + if (do_notify != 0) { + devfs_record_event(&event_log, dnp2, VNODE_EVENT_DELETE); + } + if (dnp2->dn_linklist) { do { lastlink = (1 == dnp2->dn_links); + /* Each parent of a link to this file has lost a child in this plane */ + if (do_notify != 0) { + devfs_record_event(&event_log, dnp2->dn_linklist->de_parent, VNODE_EVENT_FILE_REMOVED); + } dev_free_name(dnp2->dn_linklist); } while (!lastlink); } @@ -709,11 +887,19 @@ devfs_remove(void *dirent_p) if (dnp->dn_linklist) { do { lastlink = (1 == dnp->dn_links); + /* Each parent of a link to this file has lost a child */ + if (do_notify != 0) { + devfs_record_event(&event_log, dnp->dn_linklist->de_parent, VNODE_EVENT_FILE_REMOVED); + } dev_free_name(dnp->dn_linklist); } while (!lastlink); } out: DEVFS_UNLOCK(); + if (do_notify != 0) { + devfs_bulk_notify(&event_log); + devfs_release_event_log(&event_log, need_free); + } return ; } @@ -737,6 +923,7 @@ dev_dup_plane(struct devfsmount *devfs_mp_p) if ((error = dev_dup_entry(NULL, dev_root, &new, devfs_mp_p))) return error; devfs_mp_p->plane_root = new; + devfs_nmountplanes++; return error; } @@ -758,6 +945,11 @@ devfs_free_plane(struct devfsmount *devfs_mp_p) dev_free_name(dirent_p); } devfs_mp_p->plane_root = NULL; + devfs_nmountplanes--; + + if (devfs_nmountplanes > (devfs_nmountplanes+1)) { + panic("plane count wrapped around.\n"); + } } @@ -860,12 +1052,12 @@ dev_free_name(devdirent_t * dirent_p) if(dnp->dn_linklist == dirent_p) { dnp->dn_linklist = dirent_p->de_nextlink; } - dirent_p->de_nextlink->de_prevlinkp - = dirent_p->de_prevlinkp; - *dirent_p->de_prevlinkp = dirent_p->de_nextlink; } devfs_dn_free(dnp); } + + dirent_p->de_nextlink->de_prevlinkp = dirent_p->de_prevlinkp; + *(dirent_p->de_prevlinkp) = dirent_p->de_nextlink; /* * unlink ourselves from the directory on this plane @@ -924,7 +1116,11 @@ dev_free_hier(devdirent_t * dirent_p) * associated, or get a new one and associate it with the dev_node * * called with DEVFS_LOCK held - ***************************************************************/ + * + * If an error is returned, then the dnp may have been freed (we + * raced with a delete and lost). A devnode should not be accessed + * after devfs_dntovn() fails. + ****************************************************************/ int devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, __unused struct proc * p) { @@ -933,13 +1129,26 @@ devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, __unused struct proc * p) struct vnode_fsparam vfsp; enum vtype vtype = 0; int markroot = 0; + int nretries = 0; + int n_minor = DEVFS_CLONE_ALLOC; /* new minor number for clone device */ + + /* + * We should never come in and find that our devnode has been marked for delete. + * The lookup should have held the lock from entry until now; it should not have + * been able to find a removed entry. Any other pathway would have just created + * the devnode and come here without dropping the devfs lock, so no one would + * have a chance to delete. + */ + if (dnp->dn_lflags & DN_DELETE) { + panic("devfs_dntovn: DN_DELETE set on a devnode upon entry."); + } + + devfs_ref_node(dnp); retry: *vn_pp = NULL; vn_p = dnp->dn_vn; - dnp->dn_lflags |= DN_BUSY; - if (vn_p) { /* already has a vnode */ uint32_t vid; @@ -947,7 +1156,16 @@ retry: DEVFS_UNLOCK(); - error = vnode_getwithvid(vn_p, vid); + /* + * We want to use the drainok variant of vnode_getwithvid + * because we _don't_ want to get an iocount if the vnode is + * is blocked in vnode_drain as it can cause infinite + * loops in vn_open_auth. While in use vnodes are typically + * only reclaimed on forced unmounts, In use devfs tty vnodes + * can be quite frequently reclaimed by revoke(2) or by the + * exit of a controlling process. + */ + error = vnode_getwithvid_drainok(vn_p, vid); DEVFS_LOCK(); @@ -964,21 +1182,49 @@ retry: */ vnode_put(vn_p); } + + /* + * This entry is no longer in the namespace. This is only + * possible for lookup: no other path would not find an existing + * vnode. Therefore, ENOENT is a valid result. + */ + error = ENOENT; + } else if (error == ENODEV) { + /* + * The Filesystem is getting unmounted. + */ + error = ENOENT; + } else if (error && (nretries < DEV_MAX_VNODE_RETRY)) { /* - * set the error to EAGAIN - * which will cause devfs_lookup - * to retry this node + * If we got an error from vnode_getwithvid, it means + * we raced with a recycle and lost i.e. we asked for + * an iocount only after vnode_drain had been entered + * for the vnode and returned with an error only after + * devfs_reclaim was called on the vnode. devfs_reclaim + * sets dn_vn to NULL but while we were waiting to + * reacquire DEVFS_LOCK, another vnode might have gotten + * associated with the dnp. In either case, we need to + * retry otherwise we will end up returning an ENOENT + * for this lookup but the next lookup will succeed + * because it creates a new vnode (or a racing lookup + * created a new vnode already). */ - error = EAGAIN; + error = 0; + nretries++; + goto retry; } if ( !error) *vn_pp = vn_p; - devfs_release_busy(dnp); - - return error; + goto out; } + /* + * If we get here, then we've beaten any deletes; + * if someone sets DN_DELETE during a subsequent drop + * of the devfs lock, we'll still vend a vnode. + */ + if (dnp->dn_lflags & DN_CREATE) { dnp->dn_lflags |= DN_CREATEWAIT; msleep(&dnp->dn_lflags, &devfs_mutex, PRIBIO, 0 , 0); @@ -1001,6 +1247,11 @@ retry: case DEV_CDEV: vtype = (dnp->dn_type == DEV_BDEV) ? VBLK : VCHR; break; +#if FDESC + case DEV_DEVFD: + vtype = VDIR; + break; +#endif /* FDESC */ } vfsp.vnfs_mp = dnp->dn_dvm->mount; vfsp.vnfs_vtype = vtype; @@ -1010,10 +1261,28 @@ retry: vfsp.vnfs_cnp = 0; vfsp.vnfs_vops = *(dnp->dn_ops); - if (vtype == VBLK || vtype == VCHR) + if (vtype == VBLK || vtype == VCHR) { + /* + * Ask the clone minor number function for a new minor number + * to use for the next device instance. If an administative + * limit has been reached, this function will return -1. + */ + if (dnp->dn_clone != NULL) { + int n_major = major(dnp->dn_typeinfo.dev); + + n_minor = (*dnp->dn_clone)(dnp->dn_typeinfo.dev, DEVFS_CLONE_ALLOC); + if (n_minor == -1) { + error = ENOMEM; + goto out; + } + + vfsp.vnfs_rdev = makedev(n_major, n_minor);; + } else { vfsp.vnfs_rdev = dnp->dn_typeinfo.dev; - else + } + } else { vfsp.vnfs_rdev = 0; + } vfsp.vnfs_filesize = 0; vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE; /* Tag system files */ @@ -1023,42 +1292,91 @@ retry: DEVFS_UNLOCK(); error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vn_p); + + /* Do this before grabbing the lock */ + if (error == 0) { + vnode_setneedinactive(vn_p); + } DEVFS_LOCK(); if (error == 0) { - if ((dnp->dn_vn)) { - panic("devnode already has a vnode?"); - } else { - dnp->dn_vn = vn_p; - *vn_pp = vn_p; vnode_settag(vn_p, VT_DEVFS); - } + + if ((dnp->dn_clone != NULL) && (dnp->dn_vn != NULLVP) ) + panic("devfs_dntovn: cloning device with a vnode?\n"); + + *vn_pp = vn_p; + + /* + * Another vnode that has this devnode as its v_data. + * This reference, unlike the one taken at the start + * of the function, persists until a VNOP_RECLAIM + * comes through for this vnode. + */ + devfs_ref_node(dnp); + + /* + * A cloned vnode is not hooked into the devnode; every lookup + * gets a new vnode. + */ + if (dnp->dn_clone == NULL) { + dnp->dn_vn = vn_p; + } + } else if (n_minor != DEVFS_CLONE_ALLOC) { + /* + * If we failed the create, we need to release the cloned minor + * back to the free list. In general, this is only useful if + * the clone function results in a state change in the cloned + * device for which the minor number was obtained. If we get + * past this point withouth falling into this case, it's + * assumed that any state to be released will be released when + * the vnode is dropped, instead. + */ + (void)(*dnp->dn_clone)(dnp->dn_typeinfo.dev, DEVFS_CLONE_FREE); } dnp->dn_lflags &= ~DN_CREATE; - if (dnp->dn_lflags & DN_CREATEWAIT) { dnp->dn_lflags &= ~DN_CREATEWAIT; wakeup(&dnp->dn_lflags); } - devfs_release_busy(dnp); +out: + /* + * Release the reference we took to prevent deletion while we weren't holding the lock. + * If not returning success, then dropping this reference could delete the devnode; + * no one should access a devnode after a call to devfs_dntovn fails. + */ + devfs_rele_node(dnp); return error; } +/* + * Increment refcount on a devnode; prevents free of the node + * while the devfs lock is not held. + */ +void +devfs_ref_node(devnode_t *dnp) +{ + dnp->dn_refcount++; +} -/*********************************************************************** - * called with DEVFS_LOCK held - ***********************************************************************/ -static void -devfs_release_busy(devnode_t *dnp) { - - dnp->dn_lflags &= ~DN_BUSY; +/* + * Release a reference on a devnode. If the devnode is marked for + * free and the refcount is dropped to zero, do the free. + */ +void +devfs_rele_node(devnode_t *dnp) +{ + dnp->dn_refcount--; + if (dnp->dn_refcount < 0) { + panic("devfs_rele_node: devnode with a negative refcount!\n"); + } else if ((dnp->dn_refcount == 0) && (dnp->dn_lflags & DN_DELETE)) { + devnode_free(dnp); + } - if (dnp->dn_lflags & DN_DELETE) - devnode_free(dnp); } /*********************************************************************** @@ -1068,7 +1386,7 @@ devfs_release_busy(devnode_t *dnp) { * called with DEVFS_LOCK held ***********************************************************************/ int -dev_add_entry(char *name, devnode_t * parent, int type, devnode_type_t * typeinfo, +dev_add_entry(const char *name, devnode_t * parent, int type, devnode_type_t * typeinfo, devnode_t * proto, struct devfsmount *dvm, devdirent_t * *nm_pp) { devnode_t * dnp; @@ -1091,6 +1409,69 @@ dev_add_entry(char *name, devnode_t * parent, int type, devnode_type_t * typeinf return error; } +static void +devfs_bulk_notify(devfs_event_log_t delp) +{ + uint32_t i; + for (i = 0; i < delp->del_used; i++) { + devfs_vnode_event_t dvep = &delp->del_entries[i]; + if (vnode_getwithvid(dvep->dve_vp, dvep->dve_vid) == 0) { + vnode_notify(dvep->dve_vp, dvep->dve_events, NULL); + vnode_put(dvep->dve_vp); + } + } +} + +static void +devfs_record_event(devfs_event_log_t delp, devnode_t *dnp, uint32_t events) +{ + if (delp->del_used >= delp->del_max) { + panic("devfs event log overflowed.\n"); + } + + /* Can only notify for nodes that have an associated vnode */ + if (dnp->dn_vn != NULLVP && vnode_ismonitored(dnp->dn_vn)) { + devfs_vnode_event_t dvep = &delp->del_entries[delp->del_used]; + dvep->dve_vp = dnp->dn_vn; + dvep->dve_vid = vnode_vid(dnp->dn_vn); + dvep->dve_events = events; + delp->del_used++; + } +} + +static int +devfs_init_event_log(devfs_event_log_t delp, uint32_t count, devfs_vnode_event_t buf) +{ + devfs_vnode_event_t dvearr; + + if (buf == NULL) { + MALLOC(dvearr, devfs_vnode_event_t, count * sizeof(struct devfs_vnode_event), M_TEMP, M_WAITOK | M_ZERO); + if (dvearr == NULL) { + return ENOMEM; + } + } else { + dvearr = buf; + } + + delp->del_max = count; + delp->del_used = 0; + delp->del_entries = dvearr; + return 0; +} + +static void +devfs_release_event_log(devfs_event_log_t delp, int need_free) +{ + if (delp->del_entries == NULL) { + panic("Free of devfs notify info that has not been intialized.\n"); + } + + if (need_free) { + FREE(delp->del_entries, M_TEMP); + } + + delp->del_entries = NULL; +} /* * Function: devfs_make_node @@ -1103,38 +1484,100 @@ dev_add_entry(char *name, devnode_t * parent, int type, devnode_type_t * typeinf * chrblk - block or character device (DEVFS_CHAR or DEVFS_BLOCK) * uid, gid - ownership * perms - permissions + * clone - minor number cloning function * fmt, ... - path format string with printf args to format the path name * Returns: * A handle to a device node if successful, NULL otherwise. */ void * -devfs_make_node(dev_t dev, int chrblk, uid_t uid, - gid_t gid, int perms, const char *fmt, ...) +devfs_make_node_clone(dev_t dev, int chrblk, uid_t uid, + gid_t gid, int perms, int (*clone)(dev_t dev, int action), + const char *fmt, ...) { devdirent_t * new_dev = NULL; - devnode_t * dnp; /* devnode for parent directory */ - devnode_type_t typeinfo; - - char *name, *path, buf[256]; /* XXX */ - int i; + devfstype_t type; va_list ap; + switch (chrblk) { + case DEVFS_CHAR: + type = DEV_CDEV; + break; + case DEVFS_BLOCK: + type = DEV_BDEV; + break; + default: + goto out; + } - DEVFS_LOCK(); + va_start(ap, fmt); + new_dev = devfs_make_node_internal(dev, type, uid, gid, perms, clone, fmt, ap); + va_end(ap); +out: + return new_dev; +} + + +/* + * Function: devfs_make_node + * + * Purpose + * Create a device node with the given pathname in the devfs namespace. + * + * Parameters: + * dev - the dev_t value to associate + * chrblk - block or character device (DEVFS_CHAR or DEVFS_BLOCK) + * uid, gid - ownership + * perms - permissions + * fmt, ... - path format string with printf args to format the path name + * Returns: + * A handle to a device node if successful, NULL otherwise. + */ +void * +devfs_make_node(dev_t dev, int chrblk, uid_t uid, + gid_t gid, int perms, const char *fmt, ...) +{ + devdirent_t * new_dev = NULL; + devfstype_t type; + va_list ap; - if (!devfs_ready) { - printf("devfs_make_node: not ready for devices!\n"); - goto out; - } if (chrblk != DEVFS_CHAR && chrblk != DEVFS_BLOCK) goto out; - DEVFS_UNLOCK(); + type = (chrblk == DEVFS_BLOCK ? DEV_BDEV : DEV_CDEV); va_start(ap, fmt); - vsnprintf(buf, sizeof(buf), fmt, ap); + new_dev = devfs_make_node_internal(dev, type, uid, gid, perms, NULL, fmt, ap); va_end(ap); + +out: + return new_dev; +} +static devdirent_t * +devfs_make_node_internal(dev_t dev, devfstype_t type, uid_t uid, + gid_t gid, int perms, int (*clone)(dev_t dev, int action), const char *fmt, va_list ap) +{ + devdirent_t * new_dev = NULL; + devnode_t * dnp; + devnode_type_t typeinfo; + + char *name, buf[256]; /* XXX */ + const char *path; +#if CONFIG_MACF + char buff[sizeof(buf)]; +#endif + int i; + uint32_t log_count; + struct devfs_event_log event_log; + struct devfs_vnode_event stackbuf[NUM_STACK_ENTRIES]; + int need_free = 0; + + vsnprintf(buf, sizeof(buf), fmt, ap); + +#if CONFIG_MACF + bcopy(buf, buff, sizeof(buff)); + buff[sizeof(buff)-1] = 0; +#endif name = NULL; for(i=strlen(buf); i>0; i--) @@ -1151,23 +1594,55 @@ devfs_make_node(dev_t dev, int chrblk, uid_t uid, name = buf; path = "/"; } + + log_count = devfs_nmountplanes; + if (log_count > NUM_STACK_ENTRIES) { +wrongsize: + need_free = 1; + if (devfs_init_event_log(&event_log, log_count, NULL) != 0) { + return NULL; + } + } else { + need_free = 0; + log_count = NUM_STACK_ENTRIES; + if (devfs_init_event_log(&event_log, log_count, &stackbuf[0]) != 0) { + return NULL; + } + } + DEVFS_LOCK(); + if (log_count < devfs_nmountplanes) { + DEVFS_UNLOCK(); + devfs_release_event_log(&event_log, need_free); + log_count = log_count * 2; + goto wrongsize; + } + + if (!devfs_ready) { + printf("devfs_make_node: not ready for devices!\n"); + goto out; + } /* find/create directory path ie. mkdir -p */ - if (dev_finddir(path, NULL, CREATE, &dnp) == 0) { + if (dev_finddir(path, NULL, DEVFS_CREATE, &dnp, &event_log) == 0) { typeinfo.dev = dev; - if (dev_add_entry(name, dnp, - (chrblk == DEVFS_CHAR) ? DEV_CDEV : DEV_BDEV, - &typeinfo, NULL, NULL, &new_dev) == 0) { + if (dev_add_entry(name, dnp, type, &typeinfo, NULL, NULL, &new_dev) == 0) { new_dev->de_dnp->dn_gid = gid; new_dev->de_dnp->dn_uid = uid; new_dev->de_dnp->dn_mode |= perms; - devfs_propogate(dnp->dn_typeinfo.Dir.myname, new_dev); + new_dev->de_dnp->dn_clone = clone; +#if CONFIG_MACF + mac_devfs_label_associate_device(dev, new_dev->de_dnp, buff); +#endif + devfs_propogate(dnp->dn_typeinfo.Dir.myname, new_dev, &event_log); } } + out: DEVFS_UNLOCK(); + devfs_bulk_notify(&event_log); + devfs_release_event_log(&event_log, need_free); return new_dev; } @@ -1186,6 +1661,8 @@ devfs_make_link(void *original, char *fmt, ...) devdirent_t * new_dev = NULL; devdirent_t * orig = (devdirent_t *) original; devnode_t * dirnode; /* devnode for parent directory */ + struct devfs_event_log event_log; + uint32_t log_count; va_list ap; char *p, buf[256]; /* XXX */ @@ -1194,8 +1671,9 @@ devfs_make_link(void *original, char *fmt, ...) DEVFS_LOCK(); if (!devfs_ready) { + DEVFS_UNLOCK(); printf("devfs_make_link: not ready for devices!\n"); - goto out; + return -1; } DEVFS_UNLOCK(); @@ -1212,24 +1690,43 @@ devfs_make_link(void *original, char *fmt, ...) break; } } + + /* + * One slot for each directory, one for each devnode + * whose link count changes + */ + log_count = devfs_nmountplanes * 2; +wrongsize: + if (devfs_init_event_log(&event_log, log_count, NULL) != 0) { + /* No lock held, no allocations done, can just return */ + return -1; + } + DEVFS_LOCK(); + if (log_count < devfs_nmountplanes) { + DEVFS_UNLOCK(); + devfs_release_event_log(&event_log, 1); + log_count = log_count * 2; + goto wrongsize; + } + if (p) { *p++ = '\0'; - if (dev_finddir(buf, NULL, CREATE, &dirnode) + if (dev_finddir(buf, NULL, DEVFS_CREATE, &dirnode, &event_log) || dev_add_name(p, dirnode, NULL, orig->de_dnp, &new_dev)) goto fail; } else { - if (dev_finddir("", NULL, CREATE, &dirnode) + if (dev_finddir("", NULL, DEVFS_CREATE, &dirnode, &event_log) || dev_add_name(buf, dirnode, NULL, orig->de_dnp, &new_dev)) goto fail; } - devfs_propogate(dirnode->dn_typeinfo.Dir.myname, new_dev); + devfs_propogate(dirnode->dn_typeinfo.Dir.myname, new_dev, &event_log); fail: -out: DEVFS_UNLOCK(); + devfs_bulk_notify(&event_log); + devfs_release_event_log(&event_log, 1); return ((new_dev != NULL) ? 0 : -1); } -