]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/kern/posix_shm.c
xnu-6153.81.5.tar.gz
[apple/xnu.git] / bsd / kern / posix_shm.c
index 49e035a403e9916fc01815f5455ac2c292a3dc6f..29c89efb93bba6b668479a07c5950ee047ac27d5 100644 (file)
@@ -1,23 +1,29 @@
 /*
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
  *
  *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
- * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  *     Copyright (c) 1990, 1996-1998 Apple Computer, Inc.
  */
 /*
  *     Copyright (c) 1990, 1996-1998 Apple Computer, Inc.
  *     Created for MacOSX
  *
  */
  *     Created for MacOSX
  *
  */
+/*
+ * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
+ * support for mandatory and extensible security protections.  This notice
+ * is included in support of clause 2.2 (b) of the Apple Public License,
+ * Version 2.0.
+ */
 
 #include <sys/cdefs.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 
 #include <sys/cdefs.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
-#include <sys/file.h>
+#include <sys/file_internal.h>
 #include <sys/filedesc.h>
 #include <sys/stat.h>
 #include <sys/filedesc.h>
 #include <sys/stat.h>
-#include <sys/buf.h>
-#include <sys/proc.h>
+#include <sys/proc_internal.h>
+#include <sys/kauth.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/vnode.h>
+#include <sys/vnode_internal.h>
 #include <sys/ioctl.h>
 #include <sys/tty.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/ioctl.h>
 #include <sys/tty.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
-
-#include <bsm/audit_kernel.h>
+#include <sys/stat.h>
+#include <sys/sysproto.h>
+#include <sys/proc_info.h>
+#include <sys/posix_shm.h>
+#include <security/audit/audit.h>
+#include <stdbool.h>
+
+#if CONFIG_MACF
+#include <security/mac_framework.h>
+#endif
 
 #include <mach/mach_types.h>
 
 #include <mach/mach_types.h>
+#include <mach/mach_vm.h>
+#include <mach/vm_map.h>
 #include <mach/vm_prot.h>
 #include <mach/vm_inherit.h>
 #include <mach/kern_return.h>
 #include <mach/memory_object_control.h>
 
 #include <mach/vm_prot.h>
 #include <mach/vm_inherit.h>
 #include <mach/kern_return.h>
 #include <mach/memory_object_control.h>
 
+#include <vm/vm_map.h>
+#include <vm/vm_protos.h>
 
 
-#define        PSHMNAMLEN      31      /* maximum name segment length we bother with */
-
-struct pshminfo {
-       unsigned int    pshm_flags;
-       unsigned int    pshm_usecount;
-       off_t           pshm_length;
-       mode_t          pshm_mode;
-       uid_t           pshm_uid;
-       gid_t           pshm_gid;
-       char            pshm_name[PSHMNAMLEN + 1];      /* segment name */
-       void *          pshm_memobject;
-#if DIAGNOSTIC
-       unsigned int    pshm_readcount;
-       unsigned int    pshm_writecount;
-       struct proc *   pshm_proc;
-#endif /* DIAGNOSTIC */
-};
-#define PSHMINFO_NULL (struct pshminfo *)0
-
-#define        PSHM_NONE       1
-#define        PSHM_DEFINED    2
-#define        PSHM_ALLOCATED  4
-#define        PSHM_MAPPED     8
-#define        PSHM_INUSE      0x10
-#define        PSHM_REMOVED    0x20
-#define        PSHM_INCREATE   0x40
-#define        PSHM_INDELETE   0x80
-
-struct pshmcache {
-       LIST_ENTRY(pshmcache) pshm_hash;        /* hash chain */
-       struct  pshminfo *pshminfo;             /* vnode the name refers to */
-       int     pshm_nlen;              /* length of name */
-       char    pshm_name[PSHMNAMLEN + 1];      /* segment name */
-};
-#define PSHMCACHE_NULL (struct pshmcache *)0
-
-struct pshmstats {
-       long    goodhits;               /* hits that we can really use */
-       long    neghits;                /* negative hits that we can use */
-       long    badhits;                /* hits we must drop */
-       long    falsehits;              /* hits with id mismatch */
-       long    miss;           /* misses */
-       long    longnames;              /* long names that ignore cache */
-};
+#define f_flag f_fglob->fg_flag
+#define f_type f_fglob->fg_ops->fo_type
+#define f_msgcount f_fglob->fg_msgcount
+#define f_cred f_fglob->fg_cred
+#define f_ops f_fglob->fg_ops
+#define f_offset f_fglob->fg_offset
+#define f_data f_fglob->fg_data
 
 
-struct pshmname {
-       char    *pshm_nameptr;  /* pointer to looked up name */
-       long    pshm_namelen;   /* length of looked up component */
-       u_long  pshm_hash;      /* hash value of looked up name */
-};
+/*
+ * Used to construct the list of memory objects
+ * assigned to a populated shared memory segment.
+ */
+typedef struct pshm_mobj {
+       void                  *pshmo_memobject;
+       memory_object_size_t  pshmo_size;
+       SLIST_ENTRY(pshm_mobj) pshmo_next;
+} pshm_mobj_t;
 
 
-struct pshmnode {
-       off_t  mapp_addr;
-       size_t  map_size;
-       struct pshminfo *pinfo;
-       unsigned int    pshm_usecount;
-#if DIAGNOSTIC
-       unsigned int readcnt;
-       unsigned int writecnt;
-#endif
-};
-#define PSHMNODE_NULL (struct pshmnode *)0
+/*
+ * This represents an existing Posix shared memory object.
+ *
+ * It comes into existence with a shm_open(...O_CREAT...)
+ * call and goes away only after it has been shm_unlink()ed
+ * and the last remaining shm_open() file reference is closed.
+ *
+ * To keep track of that lifetime, pshm_usecount is used as a reference
+ * counter. It's incremented for every successful shm_open() and
+ * one extra time for the shm_unlink() to release. Internally
+ * you can temporarily use an additional reference whenever the
+ * subsystem lock has to be dropped for other reasons.
+ */
+typedef struct internal_pshminfo {
+       struct pshminfo pshm_hdr;
+       SLIST_HEAD(pshm_mobjhead, pshm_mobj) pshm_mobjs;
+       RB_ENTRY(internal_pshminfo) pshm_links;        /* links for red/black tree */
+} pshm_info_t;
+#define pshm_flags    pshm_hdr.pshm_flags
+#define pshm_usecount pshm_hdr.pshm_usecount
+#define pshm_length   pshm_hdr.pshm_length
+#define pshm_mode     pshm_hdr.pshm_mode
+#define pshm_uid      pshm_hdr.pshm_uid
+#define pshm_gid      pshm_hdr.pshm_gid
+#define pshm_label    pshm_hdr.pshm_label
+
+/* Values for pshm_flags that are still used */
+#define PSHM_ALLOCATED  0x004   /* backing storage is allocated */
+#define PSHM_MAPPED     0x008   /* mapped at least once */
+#define PSHM_INUSE      0x010   /* mapped at least once */
+#define PSHM_REMOVED    0x020   /* no longer in the name cache due to shm_unlink() */
+#define PSHM_ALLOCATING 0x100   /* storage is being allocated */
 
 
+/*
+ * These handle reference counting pshm_info_t structs using pshm_usecount.
+ */
+static int pshm_ref(pshm_info_t *pinfo);
+static void pshm_deref(pshm_info_t *pinfo);
+#define PSHM_MAXCOUNT UINT_MAX
 
 
-#define PSHMHASH(pnp) \
-       (&pshmhashtbl[(pnp)->pshm_hash & pshmhash])
-LIST_HEAD(pshmhashhead, pshmcache) *pshmhashtbl;       /* Hash Table */
-u_long pshmhash;                               /* size of hash table - 1 */
-long   pshmnument;                     /* number of cache entries allocated */
-struct pshmstats pshmstats;            /* cache effectiveness statistics */
+/*
+ * For every shm_open, we get a new one of these.
+ * The only reason we don't just use pshm_info directly is that
+ * you can query the mapped memory objects via proc_pidinfo to
+ * query the mapped address. Note that even this is a hack. If
+ * you mmap() the same fd multiple times, we only save/report
+ * one address.
+ */
+typedef struct pshmnode {
+       off_t       mapp_addr;
+       pshm_info_t *pinfo;
+} pshmnode_t;
 
 
-static int pshm_read  __P((struct file *fp, struct uio *uio,
-                   struct ucred *cred, int flags, struct proc *p));
-static int pshm_write  __P((struct file *fp, struct uio *uio,
-                   struct ucred *cred, int flags, struct proc *p));
-static int pshm_ioctl  __P((struct file *fp, u_long com,
-                   caddr_t data, struct proc *p));
-static int pshm_select  __P((struct file *fp, int which, void *wql,
-                   struct proc *p));
-static int pshm_closefile  __P((struct file *fp, struct proc *p));
 
 
-static int pshm_kqfilter __P((struct file *fp, struct knote *kn, struct proc *p));
+/* compare function for the red black tree */
+static int
+pshm_compare(pshm_info_t *a, pshm_info_t *b)
+{
+       int cmp = strncmp(a->pshm_hdr.pshm_name, b->pshm_hdr.pshm_name, PSHMNAMLEN + 1);
+
+       if (cmp < 0) {
+               return -1;
+       }
+       if (cmp > 0) {
+               return 1;
+       }
+       return 0;
+}
 
 
-struct         fileops pshmops =
-       { pshm_read, pshm_write, pshm_ioctl, pshm_select, pshm_closefile, pshm_kqfilter };
 
 /*
 
 /*
- * Lookup an entry in the cache 
- * 
- * 
- * status of -1 is returned if matches
- * If the lookup determines that the name does not exist
- * (negative cacheing), a status of ENOENT is returned. If the lookup
- * fails, a status of zero is returned.
+ * shared memory "paths" are stored in a red black tree for lookup
  */
  */
+u_long pshmnument;    /* count of entries allocated in the red black tree */
+RB_HEAD(pshmhead, internal_pshminfo) pshm_head;
+RB_PROTOTYPE(pshmhead, internal_pshminfo, pshm_links, pshm_compare)
+RB_GENERATE(pshmhead, internal_pshminfo, pshm_links, pshm_compare)
+
+/* lookup, add, remove functions */
+static pshm_info_t *pshm_cache_search(pshm_info_t * look);
+static void pshm_cache_add(pshm_info_t *entry);
+static void pshm_cache_delete(pshm_info_t *entry);
+
+static int pshm_closefile(struct fileglob *fg, vfs_context_t ctx);
+
+static int pshm_access(pshm_info_t *pinfo, int mode, kauth_cred_t cred, proc_t p);
+int pshm_cache_purge_all(proc_t p);
+
+static int pshm_unlink_internal(pshm_info_t *pinfo);
+
+static const struct fileops pshmops = {
+       .fo_type     = DTYPE_PSXSHM,
+       .fo_read     = fo_no_read,
+       .fo_write    = fo_no_write,
+       .fo_ioctl    = fo_no_ioctl,
+       .fo_select   = fo_no_select,
+       .fo_close    = pshm_closefile,
+       .fo_drain    = fo_no_drain,
+       .fo_kqfilter = fo_no_kqfilter,
+};
 
 
-int
-pshm_cache_search(pshmp, pnp, pcache)
-       struct pshminfo **pshmp;
-       struct pshmname *pnp;
-       struct pshmcache **pcache;
-{
-       register struct pshmcache *pcp, *nnp;
-       register struct pshmhashhead *pcpp;
+/*
+ * Everything here is protected by a single mutex.
+ */
+static lck_grp_t       *psx_shm_subsys_lck_grp;
+static lck_grp_attr_t  *psx_shm_subsys_lck_grp_attr;
+static lck_attr_t      *psx_shm_subsys_lck_attr;
+static lck_mtx_t        psx_shm_subsys_mutex;
 
 
-       if (pnp->pshm_namelen > PSHMNAMLEN) {
-               pshmstats.longnames++;
-               return (0);
-       }
+#define PSHM_SUBSYS_LOCK() lck_mtx_lock(& psx_shm_subsys_mutex)
+#define PSHM_SUBSYS_UNLOCK() lck_mtx_unlock(& psx_shm_subsys_mutex)
+#define PSHM_SUBSYS_ASSERT_HELD()  LCK_MTX_ASSERT(&psx_shm_subsys_mutex, LCK_MTX_ASSERT_OWNED)
 
 
-       pcpp = PSHMHASH(pnp);
-       for (pcp = pcpp->lh_first; pcp != 0; pcp = nnp) {
-               nnp = pcp->pshm_hash.le_next;
-               if (pcp->pshm_nlen == pnp->pshm_namelen &&
-                   !bcmp(pcp->pshm_name, pnp->pshm_nameptr,                                            (u_int)pcp-> pshm_nlen))
-                       break;
-       }
 
 
-       if (pcp == 0) {
-               pshmstats.miss++;
-               return (0);
-       }
+__private_extern__ void
+pshm_lock_init( void )
+{
+       psx_shm_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
 
 
-       /* We found a "positive" match, return the vnode */
-        if (pcp->pshminfo) {
-               pshmstats.goodhits++;
-               /* TOUCH(ncp); */
-               *pshmp = pcp->pshminfo;
-               *pcache = pcp;
-               return (-1);
-       }
+       psx_shm_subsys_lck_grp =
+           lck_grp_alloc_init("posix shared memory", psx_shm_subsys_lck_grp_attr);
 
 
-       /*
-        * We found a "negative" match, ENOENT notifies client of this match.
-        * The nc_vpid field records whether this is a whiteout.
-        */
-       pshmstats.neghits++;
-       return (ENOENT);
+       psx_shm_subsys_lck_attr = lck_attr_alloc_init();
+       lck_mtx_init(&psx_shm_subsys_mutex, psx_shm_subsys_lck_grp, psx_shm_subsys_lck_attr);
 }
 
 /*
 }
 
 /*
- * Add an entry to the cache.
+ * Lookup an entry in the cache. Only the name is used from "look".
  */
  */
-int
-pshm_cache_add(pshmp, pnp)
-       struct pshminfo *pshmp;
-       struct pshmname *pnp;
+static pshm_info_t *
+pshm_cache_search(pshm_info_t *look)
 {
 {
-       register struct pshmcache *pcp;
-       register struct pshmhashhead *pcpp;
-       struct pshminfo *dpinfo;
-       struct pshmcache *dpcp;
-
-#if DIAGNOSTIC
-       if (pnp->pshm_namelen > NCHNAMLEN)
-               panic("cache_enter: name too long");
-#endif
+       PSHM_SUBSYS_ASSERT_HELD();
+       return RB_FIND(pshmhead, &pshm_head, look);
+}
 
 
-       /*
-        * We allocate a new entry if we are less than the maximum
-        * allowed and the one at the front of the LRU list is in use.
-        * Otherwise we use the one at the front of the LRU list.
-        */
-       pcp = (struct pshmcache *)_MALLOC(sizeof(struct pshmcache), M_SHM, M_WAITOK);
-       /*  if the entry has already been added by some one else return */
-       if (pshm_cache_search(&dpinfo, pnp, &dpcp) == -1) {
-               _FREE(pcp, M_SHM);
-               return(EEXIST);
+/*
+ * Add a new entry to the cache.
+ */
+static void
+pshm_cache_add(pshm_info_t *entry)
+{
+       pshm_info_t *conflict;
+
+       PSHM_SUBSYS_ASSERT_HELD();
+       conflict = RB_INSERT(pshmhead, &pshm_head, entry);
+       if (conflict != NULL) {
+               panic("pshm_cache_add() found %p", conflict);
        }
        pshmnument++;
        }
        pshmnument++;
+}
 
 
-       bzero(pcp, sizeof(struct pshmcache));
-       /*
-        * Fill in cache info, if vp is NULL this is a "negative" cache entry.
-        * For negative entries, we have to record whether it is a whiteout.
-        * the whiteout flag is stored in the nc_vpid field which is
-        * otherwise unused.
-        */
-       pcp->pshminfo = pshmp;
-       pcp->pshm_nlen = pnp->pshm_namelen;
-       bcopy(pnp->pshm_nameptr, pcp->pshm_name, (unsigned)pcp->pshm_nlen);
-       pcpp = PSHMHASH(pnp);
-#if DIAGNOSTIC
-       {
-               register struct pshmcache *p;
-
-               for (p = pcpp->lh_first; p != 0; p = p->pshm_hash.le_next)
-                       if (p == pcp)
-                               panic("cache_enter: duplicate");
-       }
-#endif
-       LIST_INSERT_HEAD(pcpp, pcp, pshm_hash);
-       return(0);
+/*
+ * Remove the given entry from the red black tree.
+ */
+static void
+pshm_cache_delete(pshm_info_t *entry)
+{
+       PSHM_SUBSYS_ASSERT_HELD();
+       assert(!(entry->pshm_flags & PSHM_REMOVED));
+       RB_REMOVE(pshmhead, &pshm_head, entry);
+       pshmnument--;
 }
 
 /*
 }
 
 /*
- * Name cache initialization, from vfs_init() when we are booting
+ * Initialize the red black tree.
  */
 void
  */
 void
-pshm_cache_init()
+pshm_cache_init(void)
 {
 {
-       pshmhashtbl = hashinit(desiredvnodes, M_SHM, &pshmhash);
+       RB_INIT(&pshm_head);
 }
 
 /*
 }
 
 /*
- * Invalidate a all entries to particular vnode.
- * 
- * We actually just increment the v_id, that will do it. The entries will
- * be purged by lookup as they get found. If the v_id wraps around, we
- * need to ditch the entire cache, to avoid confusion. No valid vnode will
- * ever have (v_id == 0).
+ * Invalidate all entries and delete all objects associated with them
+ * XXX - due to the reference counting, this only works if all userland
+ * references to it via file descriptors are also closed already. Is this
+ * known to be called after all user processes are killed?
  */
  */
-void
-pshm_cache_purge(void)
+int
+pshm_cache_purge_all(__unused proc_t proc)
 {
 {
-       struct pshmcache *pcp;
-       struct pshmhashhead *pcpp;
+       pshm_info_t *p;
+       pshm_info_t *tmp;
+       int error = 0;
 
 
-       for (pcpp = &pshmhashtbl[pshmhash]; pcpp >= pshmhashtbl; pcpp--) {
-               while (pcp = pcpp->lh_first)
-                       pshm_cache_delete(pcp);
+       if (kauth_cred_issuser(kauth_cred_get()) == 0) {
+               return EPERM;
        }
        }
+
+       PSHM_SUBSYS_LOCK();
+       RB_FOREACH_SAFE(p, pshmhead, &pshm_head, tmp) {
+               error = pshm_unlink_internal(p);
+               if (error) {  /* XXX: why give up on failure, should keep going */
+                       goto out;
+               }
+       }
+       assert(pshmnument == 0);
+
+out:
+       PSHM_SUBSYS_UNLOCK();
+
+       if (error) {
+               printf("%s: Error %d removing posix shm cache: %ld remain!\n",
+                   __func__, error, pshmnument);
+       }
+       return error;
 }
 
 }
 
-pshm_cache_delete(pcp)
-       struct pshmcache *pcp;
+/*
+ * Utility to get the shared memory name from userspace and
+ * populate a pshm_info_t with it. If there's a problem
+ * reading the name or it's malformed, will return an error code.
+ */
+static int
+pshm_get_name(pshm_info_t *pinfo, const user_addr_t user_addr)
 {
 {
-#if DIAGNOSTIC
-       if (pcp->pshm_hash.le_prev == 0)
-               panic("namecache purge le_prev");
-       if (pcp->pshm_hash.le_next == pcp)
-               panic("namecache purge le_next");
-#endif /* DIAGNOSTIC */
-       LIST_REMOVE(pcp, pshm_hash);
-       pcp->pshm_hash.le_prev = 0;     
-       pshmnument--;
-}
+       size_t bytes_copied = 0;
+       int error;
 
 
 
 
-struct shm_open_args {
-       const char *name;
-       int oflag;
-       int mode;
-};
+       error = copyinstr(user_addr, &pinfo->pshm_hdr.pshm_name[0], PSHMNAMLEN + 1, &bytes_copied);
+       if (error != 0) {
+               return error;
+       }
+       assert(bytes_copied <= PSHMNAMLEN + 1);
+       assert(pinfo->pshm_hdr.pshm_name[bytes_copied - 1] == 0);
+       if (bytes_copied < 2) { /* 2: expect at least one character and terminating zero */
+               return EINVAL;
+       }
+       AUDIT_ARG(text, &pinfo->pshm_hdr.pshm_name[0]);
+       return 0;
+}
 
 
+/*
+ * Process a shm_open() system call.
+ */
 int
 int
-shm_open(p, uap, retval)
-       struct proc *p;
-       register struct shm_open_args *uap;
-       register_t *retval;
+shm_open(proc_t p, struct shm_open_args *uap, int32_t *retval)
 {
 {
-       register struct filedesc *fdp = p->p_fd;
-       register struct file *fp;
-       register struct vnode *vp;
-       int  i;
-       struct file *nfp;
-       int type, indx, error;
-       struct pshmname nd;
-       struct pshminfo *pinfo;
-       extern struct fileops pshmops;
-       char * pnbuf;
-       char * nameptr;
-       char * cp;
-       size_t pathlen, plen;
-       int fmode ;
-       int cmode = uap->mode;
-       int incache = 0;
-       struct pshmnode * pnode = PSHMNODE_NULL;
-       struct pshmcache * pcache = PSHMCACHE_NULL;
-       int pinfo_alloc=0;
+       int             indx;
+       int             error = 0;
+       pshm_info_t     *pinfo = NULL;
+       pshm_info_t     *new_pinfo = NULL;
+       pshmnode_t      *new_pnode = NULL;
+       struct fileproc *fp = NULL;
+       int             fmode;
+       int             cmode = uap->mode;
+       bool            incache = false;
+       bool            have_label = false;
 
        AUDIT_ARG(fflags, uap->oflag);
        AUDIT_ARG(mode, uap->mode);
 
        AUDIT_ARG(fflags, uap->oflag);
        AUDIT_ARG(mode, uap->mode);
-       pinfo = PSHMINFO_NULL;
 
 
-       MALLOC_ZONE(pnbuf, caddr_t,
-                       MAXPATHLEN, M_NAMEI, M_WAITOK);
-       pathlen = MAXPATHLEN;
-       error = copyinstr((void *)uap->name, (void *)pnbuf,
-               MAXPATHLEN, &pathlen);
-       if (error) {
-               goto bad;
-       }
-       AUDIT_ARG(text, pnbuf);
-       if (pathlen > PSHMNAMLEN) {
-               error = ENAMETOOLONG;
+       /*
+        * Allocate data structures we need. We parse the userspace name into
+        * a pshm_info_t, even when we don't need to O_CREAT.
+        */
+       MALLOC(new_pinfo, pshm_info_t *, sizeof(pshm_info_t), M_SHM, M_WAITOK | M_ZERO);
+       if (new_pinfo == NULL) {
+               error = ENOSPC;
                goto bad;
        }
 
                goto bad;
        }
 
-
-#ifdef PSXSHM_NAME_RESTRICT
-       nameptr = pnbuf;
-       if (*nameptr == '/') {
-               while (*(nameptr++) == '/') {
-                       plen--;
-                       error = EINVAL;
-                       goto bad;
-               }
-        } else {
-               error = EINVAL;
+       /*
+        * Get and check the name.
+        */
+       error = pshm_get_name(new_pinfo, uap->name);
+       if (error != 0) {
                goto bad;
        }
                goto bad;
        }
-#endif /* PSXSHM_NAME_RESTRICT */
 
 
-       plen = pathlen;
-       nameptr = pnbuf;
-       nd.pshm_nameptr = nameptr;
-       nd.pshm_namelen = plen;
-       nd. pshm_hash =0;
-
-        for (cp = nameptr, i=1; *cp != 0 && i <= plen; i++, cp++) {
-               nd.pshm_hash += (unsigned char)*cp * i;
+       /*
+        * Attempt to allocate a new fp. If unsuccessful, the fp will be
+        * left unmodified (NULL).
+        */
+       error = falloc(p, &fp, &indx, vfs_context_current());
+       if (error) {
+               goto bad;
        }
 
        }
 
-       error = pshm_cache_search(&pinfo, &nd, &pcache);
+       cmode &= ALLPERMS;
 
 
-       if (error == ENOENT) {
-               error = EINVAL;
-               goto bad;
-
-       }
-       if (!error) {
-               incache = 0;
-       } else
-               incache = 1;
        fmode = FFLAGS(uap->oflag);
        fmode = FFLAGS(uap->oflag);
-       if ((fmode & (FREAD | FWRITE))==0) {
+       if ((fmode & (FREAD | FWRITE)) == 0) {
                error = EINVAL;
                goto bad;
        }
 
                error = EINVAL;
                goto bad;
        }
 
-       if (error = falloc(p, &nfp, &indx))
+       /*
+        * Will need a new pnode for the file pointer
+        */
+       MALLOC(new_pnode, pshmnode_t *, sizeof(pshmnode_t), M_SHM, M_WAITOK | M_ZERO);
+       if (new_pnode == NULL) {
+               error = ENOSPC;
                goto bad;
                goto bad;
-       fp = nfp;
-
-       cmode &=  ALLPERMS;
+       }
 
 
+       /*
+        * If creating a new segment, fill in its information.
+        * If we find a pre-exisitng one in cache lookup we'll just toss this one later.
+        */
        if (fmode & O_CREAT) {
        if (fmode & O_CREAT) {
-               if ((fmode & O_EXCL) && incache) {
-                       AUDIT_ARG(posix_ipc_perm, pinfo->pshm_uid,
-                                 pinfo->pshm_gid, pinfo->pshm_mode);
-
-                       /* shm obj exists and opened O_EXCL */
-#if notyet
-                        if (pinfo->pshm_flags & PSHM_INDELETE) {
-                        }
-#endif 
-                        error = EEXIST;
-                        goto bad1;
-                } 
-                if (!incache) {
-                    /*  create a new one */
-                    pinfo = (struct pshminfo *)_MALLOC(sizeof(struct pshminfo), M_SHM, M_WAITOK);
-                    bzero(pinfo, sizeof(struct pshminfo));
-                       pinfo_alloc = 1;
-                    pinfo->pshm_flags = PSHM_DEFINED | PSHM_INCREATE;
-                    pinfo->pshm_usecount = 1;
-                    pinfo->pshm_mode = cmode;
-                    pinfo->pshm_uid = p->p_ucred->cr_uid;
-                    pinfo->pshm_gid = p->p_ucred->cr_gid;
-                } else {
-                    /*  already exists */
-                        if( pinfo->pshm_flags & PSHM_INDELETE) {
-                            error = ENOENT;
-                            goto bad1;
-                        }      
-                       AUDIT_ARG(posix_ipc_perm, pinfo->pshm_uid,
-                                 pinfo->pshm_gid, pinfo->pshm_mode);
-                        if (error = pshm_access(pinfo, fmode, p->p_ucred, p))
-                            goto bad1;
-                }
+               new_pinfo->pshm_usecount = 2; /* one each for: file pointer, shm_unlink */
+               new_pinfo->pshm_length = 0;
+               new_pinfo->pshm_mode = cmode;
+               new_pinfo->pshm_uid = kauth_getuid();
+               new_pinfo->pshm_gid = kauth_getgid();
+               SLIST_INIT(&new_pinfo->pshm_mobjs);
+#if CONFIG_MACF
+               mac_posixshm_label_init(&new_pinfo->pshm_hdr);
+               have_label = true;
+               error = mac_posixshm_check_create(kauth_cred_get(), new_pinfo->pshm_hdr.pshm_name);
+               if (error) {
+                       goto bad;
+               }
+#endif
+       }
+
+       /*
+        * Look up the named shared memory segment in the cache, possibly adding
+        * it for O_CREAT.
+        */
+       PSHM_SUBSYS_LOCK();
+
+       pinfo = pshm_cache_search(new_pinfo);
+       if (pinfo != NULL) {
+               incache = true;
+
+               /* Get a new reference to go with the file pointer.*/
+               error = pshm_ref(pinfo);
+               if (error) {
+                       pinfo = NULL;      /* so cleanup code doesn't deref */
+                       goto bad_locked;
+               }
+
+               /* can't have pre-existing if O_EXCL */
+               if ((fmode & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) {
+                       error = EEXIST;
+                       goto bad_locked;
+               }
+
+               /* O_TRUNC is only valid while length is not yet set */
+               if ((fmode & O_TRUNC) &&
+                   (pinfo->pshm_flags & (PSHM_ALLOCATING | PSHM_ALLOCATED))) {
+                       error = EINVAL;
+                       goto bad_locked;
+               }
        } else {
        } else {
-               if (!incache) {
-                       /* O_CREAT  is not set and the shm obecj does not exist */
+               incache = false;
+
+               /* if it wasn't found, must have O_CREAT */
+               if (!(fmode & O_CREAT)) {
                        error = ENOENT;
                        error = ENOENT;
-                       goto bad1;
+                       goto bad_locked;
                }
                }
-               if( pinfo->pshm_flags & PSHM_INDELETE) {
-                       error = ENOENT;
-                       goto bad1;
-               }       
-               if (error = pshm_access(pinfo, fmode, p->p_ucred, p))
-                       goto bad1;
-       }
-       if (fmode & O_TRUNC) {
-               error = EINVAL;
-               goto bad2;
+
+               /* Add the new region to the cache. */
+               pinfo = new_pinfo;
+               pshm_cache_add(pinfo);
+               new_pinfo = NULL;       /* so that it doesn't get free'd */
        }
        }
-#if DIAGNOSTIC 
-       if (fmode & FWRITE)
-               pinfo->pshm_writecount++;
-       if (fmode & FREAD)
-               pinfo->pshm_readcount++;
-#endif
-       pnode = (struct pshmnode *)_MALLOC(sizeof(struct pshmnode), M_SHM, M_WAITOK);
-       bzero(pnode, sizeof(struct pshmnode));
 
 
-       if (!incache) {
-               if (error = pshm_cache_add(pinfo, &nd)) {
-               goto bad3;
+       PSHM_SUBSYS_UNLOCK();
+
+       /*
+        * Check we have permission to access any pre-existing segment
+        */
+       if (incache) {
+               if (fmode & O_CREAT) {
+                       AUDIT_ARG(posix_ipc_perm, pinfo->pshm_uid,
+                           pinfo->pshm_gid, pinfo->pshm_mode);
                }
                }
+#if CONFIG_MACF
+               if ((error = mac_posixshm_check_open(kauth_cred_get(), &pinfo->pshm_hdr, fmode))) {
+                       goto bad;
+               }
+#endif
+               if ((error = pshm_access(pinfo, fmode, kauth_cred_get(), p))) {
+                       goto bad;
+               }
+       } else {
+#if CONFIG_MACF
+               mac_posixshm_label_associate(kauth_cred_get(), &pinfo->pshm_hdr, pinfo->pshm_hdr.pshm_name);
+#endif
        }
        }
-       pinfo->pshm_flags &= ~PSHM_INCREATE;
-       pinfo->pshm_usecount++;
-       pnode->pinfo = pinfo;
+
+       proc_fdlock(p);
        fp->f_flag = fmode & FMASK;
        fp->f_flag = fmode & FMASK;
-       fp->f_type = DTYPE_PSXSHM;
        fp->f_ops = &pshmops;
        fp->f_ops = &pshmops;
-       fp->f_data = (caddr_t)pnode;
-       *fdflags(p, indx) &= ~UF_RESERVED;
+       new_pnode->pinfo = pinfo;
+       fp->f_data = (caddr_t)new_pnode;
+       *fdflags(p, indx) |= UF_EXCLOSE;
+       procfdtbl_releasefd(p, indx, NULL);
+       fp_drop(p, indx, fp, 1);
+       proc_fdunlock(p);
+
        *retval = indx;
        *retval = indx;
-       FREE_ZONE(pnbuf, MAXPATHLEN, M_NAMEI);
-       return (0);
-bad3:
-       _FREE(pnode, M_SHM);
-               
-bad2:
-       if (pinfo_alloc)
-               _FREE(pinfo, M_SHM);
-bad1:
-       fdrelse(p, indx);
-       ffree(nfp);
+       error = 0;
+       goto done;
+
+bad_locked:
+       PSHM_SUBSYS_UNLOCK();
 bad:
 bad:
-       FREE_ZONE(pnbuf, MAXPATHLEN, M_NAMEI);
-       return (error);
+       /*
+        * Drop any new reference to a pre-existing shared memory region.
+        */
+       if (incache && pinfo != NULL) {
+               PSHM_SUBSYS_LOCK();
+               pshm_deref(pinfo);
+               PSHM_SUBSYS_UNLOCK();
+       }
+
+       /*
+        * Delete any allocated unused data structures.
+        */
+       if (new_pnode != NULL) {
+               FREE(new_pnode, M_SHM);
+       }
+
+       if (fp != NULL) {
+               fp_free(p, indx, fp);
+       }
+
+done:
+       if (new_pinfo != NULL) {
+#if CONFIG_MACF
+               if (have_label) {
+                       mac_posixshm_label_destroy(&new_pinfo->pshm_hdr);
+               }
+#endif
+               FREE(new_pinfo, M_SHM);
+       }
+       return error;
 }
 
 
 }
 
 
-/* ARGSUSED */
+/*
+ * The truncate call associates memory with shared memory region. It can
+ * only be succesfully done with a non-zero length once per shared memory region.
+ */
 int
 int
-pshm_truncate(p, fp, fd, length, retval)
-       struct proc *p;
-       struct file *fp;
-       int fd;
-       off_t length;
-       register_t *retval;
+pshm_truncate(
+       __unused proc_t       p,
+       struct fileproc       *fp,
+       __unused int          fd,
+       off_t                 length,
+       __unused int32_t      *retval)
 {
 {
-       struct pshminfo * pinfo;
-       struct pshmnode * pnode ;
-       kern_return_t kret;
-       vm_offset_t user_addr;
-       void * mem_object;
-       vm_size_t size;
+       pshm_info_t           *pinfo;
+       pshmnode_t            *pnode;
+       kern_return_t         kret;
+       mem_entry_name_port_t mem_object;
+       mach_vm_size_t        total_size, alloc_size;
+       memory_object_size_t  mosize;
+       pshm_mobj_t           *pshmobj, *pshmobj_last;
+       vm_map_t              user_map;
+       int                   error;
+
+       user_map = current_map();
 
        if (fp->f_type != DTYPE_PSXSHM) {
 
        if (fp->f_type != DTYPE_PSXSHM) {
-               return(EINVAL);
+               return EINVAL;
+       }
+
+#if 0
+       /*
+        * Can't enforce this yet, some third party tools don't
+        * specify O_RDWR like they ought to. See radar 48692182
+        */
+       /* ftruncate() requires write permission */
+       if (!(fp->f_flag & FWRITE)) {
+               return EINVAL;
        }
        }
-       
+#endif
 
 
-       if (((pnode = (struct pshmnode *)fp->f_data)) == PSHMNODE_NULL )
-               return(EINVAL);
+       PSHM_SUBSYS_LOCK();
+       if (((pnode = (pshmnode_t *)fp->f_data)) == NULL) {
+               PSHM_SUBSYS_UNLOCK();
+               return EINVAL;
+       }
 
 
-       if ((pinfo = pnode->pinfo) == PSHMINFO_NULL)
-               return(EINVAL);
-       if ((pinfo->pshm_flags & (PSHM_DEFINED | PSHM_ALLOCATED)) 
-                       != PSHM_DEFINED) {
-               return(EINVAL);
+       if ((pinfo = pnode->pinfo) == NULL) {
+               PSHM_SUBSYS_UNLOCK();
+               return EINVAL;
        }
 
        }
 
-       size = round_page_64(length);
-       kret = vm_allocate(current_map(), &user_addr, size, TRUE);
-       if (kret != KERN_SUCCESS) 
-               goto out;
+       /* We only allow one ftruncate() per lifetime of the shm object. */
+       if (pinfo->pshm_flags & (PSHM_ALLOCATING | PSHM_ALLOCATED)) {
+               PSHM_SUBSYS_UNLOCK();
+               return EINVAL;
+       }
+
+#if CONFIG_MACF
+       error = mac_posixshm_check_truncate(kauth_cred_get(), &pinfo->pshm_hdr, length);
+       if (error) {
+               PSHM_SUBSYS_UNLOCK();
+               return error;
+       }
+#endif
+       /*
+        * Grab an extra reference, so we can drop the lock while allocating and
+        * ensure the objects don't disappear.
+        */
+       error = pshm_ref(pinfo);
+       if (error) {
+               PSHM_SUBSYS_UNLOCK();
+               return error;
+       }
+
+       /* set ALLOCATING, so another truncate can't start */
+       pinfo->pshm_flags |= PSHM_ALLOCATING;
+       total_size = vm_map_round_page(length, vm_map_page_mask(user_map));
+
+       pshmobj_last = NULL;
+       for (alloc_size = 0; alloc_size < total_size; alloc_size += mosize) {
+               PSHM_SUBSYS_UNLOCK();
+
+               /* get a memory object back some of the shared memory */
+               mosize = MIN(total_size - alloc_size, ANON_MAX_SIZE);
+               kret = mach_make_memory_entry_64(VM_MAP_NULL, &mosize, 0,
+                   MAP_MEM_NAMED_CREATE | VM_PROT_DEFAULT, &mem_object, 0);
+
+               if (kret != KERN_SUCCESS) {
+                       goto out;
+               }
+
+               /* get a list entry to track the memory object */
+               MALLOC(pshmobj, pshm_mobj_t *, sizeof(pshm_mobj_t), M_SHM, M_WAITOK);
+               if (pshmobj == NULL) {
+                       kret = KERN_NO_SPACE;
+                       mach_memory_entry_port_release(mem_object);
+                       mem_object = NULL;
+                       goto out;
+               }
+
+               PSHM_SUBSYS_LOCK();
 
 
-       kret = mach_make_memory_entry (current_map(), &size,
-                       user_addr, VM_PROT_DEFAULT, &mem_object, 0);
+               /* link in the new entry */
+               pshmobj->pshmo_memobject = (void *)mem_object;
+               pshmobj->pshmo_size = mosize;
+               SLIST_NEXT(pshmobj, pshmo_next) = NULL;
 
 
-       if (kret != KERN_SUCCESS) 
-               goto out;
-       
-       vm_deallocate(current_map(), user_addr, size);
+               if (pshmobj_last == NULL) {
+                       SLIST_FIRST(&pinfo->pshm_mobjs) = pshmobj;
+               } else {
+                       SLIST_INSERT_AFTER(pshmobj_last, pshmobj, pshmo_next);
+               }
+               pshmobj_last = pshmobj;
+       }
 
 
-       pinfo->pshm_flags &= ~PSHM_DEFINED;
-       pinfo->pshm_flags = PSHM_ALLOCATED;
-       pinfo->pshm_memobject = mem_object;
-       pinfo->pshm_length = size;
-       return(0);
+       /* all done, change flags to ALLOCATED and return success */
+       pinfo->pshm_flags |= PSHM_ALLOCATED;
+       pinfo->pshm_flags &= ~(PSHM_ALLOCATING);
+       pinfo->pshm_length = total_size;
+       pshm_deref(pinfo);              /* drop the "allocating" reference */
+       PSHM_SUBSYS_UNLOCK();
+       return 0;
 
 out:
 
 out:
+       /* clean up any partially allocated objects */
+       PSHM_SUBSYS_LOCK();
+       while ((pshmobj = SLIST_FIRST(&pinfo->pshm_mobjs)) != NULL) {
+               SLIST_REMOVE_HEAD(&pinfo->pshm_mobjs, pshmo_next);
+               PSHM_SUBSYS_UNLOCK();
+               mach_memory_entry_port_release(pshmobj->pshmo_memobject);
+               FREE(pshmobj, M_SHM);
+               PSHM_SUBSYS_LOCK();
+       }
+       pinfo->pshm_flags &= ~PSHM_ALLOCATING;
+       pshm_deref(pinfo);              /* drop the "allocating" reference */
+       PSHM_SUBSYS_UNLOCK();
+
        switch (kret) {
        case KERN_INVALID_ADDRESS:
        case KERN_NO_SPACE:
        switch (kret) {
        case KERN_INVALID_ADDRESS:
        case KERN_NO_SPACE:
-               return (ENOMEM);
+               return ENOMEM;
        case KERN_PROTECTION_FAILURE:
        case KERN_PROTECTION_FAILURE:
-               return (EACCES);
+               return EACCES;
        default:
        default:
-               return (EINVAL);
-       
+               return EINVAL;
        }
 }
 
 int
        }
 }
 
 int
-pshm_stat(pnode, sb)
-struct pshmnode *pnode;
-struct stat *sb;
+pshm_stat(pshmnode_t *pnode, void *ub, int isstat64)
 {
 {
-       struct pshminfo *pinfo;
-       
-       if ((pinfo = pnode->pinfo) == PSHMINFO_NULL)
-               return(EINVAL);
-
-       bzero(sb, sizeof(struct stat)); 
-       sb->st_mode = pinfo->pshm_mode;
-       sb->st_uid = pinfo->pshm_uid;
-       sb->st_gid = pinfo->pshm_gid;
-       sb->st_size = pinfo->pshm_length;
-
-       return(0);
+       struct stat *sb = (struct stat *)0;     /* warning avoidance ; protected by isstat64 */
+       struct stat64 * sb64 = (struct stat64 *)0;  /* warning avoidance ; protected by isstat64 */
+       pshm_info_t *pinfo;
+#if CONFIG_MACF
+       int error;
+#endif
+
+       PSHM_SUBSYS_LOCK();
+       if ((pinfo = pnode->pinfo) == NULL) {
+               PSHM_SUBSYS_UNLOCK();
+               return EINVAL;
+       }
+
+#if CONFIG_MACF
+       error = mac_posixshm_check_stat(kauth_cred_get(), &pinfo->pshm_hdr);
+       if (error) {
+               PSHM_SUBSYS_UNLOCK();
+               return error;
+       }
+#endif
+
+       if (isstat64 != 0) {
+               sb64 = (struct stat64 *)ub;
+               bzero(sb64, sizeof(struct stat64));
+               sb64->st_mode = pinfo->pshm_mode;
+               sb64->st_uid = pinfo->pshm_uid;
+               sb64->st_gid = pinfo->pshm_gid;
+               sb64->st_size = pinfo->pshm_length;
+       } else {
+               sb = (struct stat *)ub;
+               bzero(sb, sizeof(struct stat));
+               sb->st_mode = pinfo->pshm_mode;
+               sb->st_uid = pinfo->pshm_uid;
+               sb->st_gid = pinfo->pshm_gid;
+               sb->st_size = pinfo->pshm_length;
+       }
+       PSHM_SUBSYS_UNLOCK();
+
+       return 0;
 }
 
 }
 
-int
-pshm_access(struct pshminfo *pinfo, int mode, struct ucred *cred, struct proc *p)
+/*
+ * Verify access to a shared memory region.
+ */
+static int
+pshm_access(pshm_info_t *pinfo, int mode, kauth_cred_t cred, __unused proc_t p)
 {
 {
-       mode_t mask;
-       register gid_t *gp;
-       int i, error;
+       int mode_req = ((mode & FREAD) ? S_IRUSR : 0) |
+           ((mode & FWRITE) ? S_IWUSR : 0);
 
        /* Otherwise, user id 0 always gets access. */
 
        /* Otherwise, user id 0 always gets access. */
-       if (cred->cr_uid == 0)
-               return (0);
-
-       mask = 0;
-
-       /* Otherwise, check the owner. */
-       if (cred->cr_uid == pinfo->pshm_uid) {
-               if (mode & FREAD)
-                       mask |= S_IRUSR;
-               if (mode & FWRITE)
-                       mask |= S_IWUSR;
-               return ((pinfo->pshm_mode & mask) == mask ? 0 : EACCES);
-       }
-
-       /* Otherwise, check the groups. */
-       for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
-               if (pinfo->pshm_gid == *gp) {
-                       if (mode & FREAD)
-                               mask |= S_IRGRP;
-                       if (mode & FWRITE)
-                               mask |= S_IWGRP;
-                       return ((pinfo->pshm_mode & mask) == mask ? 0 : EACCES);
-               }
+       if (!suser(cred, NULL)) {
+               return 0;
+       }
 
 
-       /* Otherwise, check everyone else. */
-       if (mode & FREAD)
-               mask |= S_IROTH;
-       if (mode & FWRITE)
-               mask |= S_IWOTH;
-       return ((pinfo->pshm_mode & mask) == mask ? 0 : EACCES);
+       return posix_cred_access(cred, pinfo->pshm_uid, pinfo->pshm_gid, pinfo->pshm_mode, mode_req);
 }
 
 }
 
-struct mmap_args {
-               caddr_t addr;
-               size_t len;
-               int prot;
-               int flags;
-               int fd;
-#ifdef DOUBLE_ALIGN_PARAMS
-               long pad;
-#endif
-               off_t pos;
-};
-
 int
 int
-pshm_mmap(struct proc *p, struct mmap_args *uap, register_t *retval, struct file *fp, vm_size_t pageoff) 
+pshm_mmap(
+       __unused proc_t    p,
+       struct mmap_args   *uap,
+       user_addr_t        *retval,
+       struct fileproc    *fp,
+       off_t              pageoff)
 {
 {
-       vm_offset_t     user_addr = (vm_offset_t)uap->addr;
-       vm_size_t       user_size = (vm_size_t)uap->len ;
-       int prot = uap->prot;
-       int flags = uap->flags;
+       vm_map_offset_t    user_addr = (vm_map_offset_t)uap->addr;
+       vm_map_size_t      user_size = (vm_map_size_t)uap->len;
+       vm_map_offset_t    user_start_addr;
+       vm_map_size_t      map_size, mapped_size;
+       int                prot = uap->prot;
+       int                max_prot = VM_PROT_DEFAULT;
+       int                flags = uap->flags;
        vm_object_offset_t file_pos = (vm_object_offset_t)uap->pos;
        vm_object_offset_t file_pos = (vm_object_offset_t)uap->pos;
-       int fd = uap->fd;
-       vm_map_t        user_map;
-       boolean_t       find_space,docow;
-       kern_return_t   kret;
-       struct pshminfo * pinfo;
-       struct pshmnode * pnode;
-       void * mem_object;
+       vm_object_offset_t map_pos;
+       vm_map_t           user_map;
+       int                alloc_flags;
+       vm_map_kernel_flags_t vmk_flags;
+       bool               docow;
+       kern_return_t      kret = KERN_SUCCESS;
+       pshm_info_t        *pinfo;
+       pshmnode_t         *pnode;
+       pshm_mobj_t        *pshmobj;
+       int                error;
+
+       if (user_size == 0) {
+               return 0;
+       }
 
 
-       if (user_size == 0) 
-               return(0);
+       if (!(flags & MAP_SHARED)) {
+               return EINVAL;
+       }
 
 
-       if ((flags & MAP_SHARED) == 0)
-               return(EINVAL);
+       /* Can't allow write permission if the shm_open() didn't allow them. */
+       if (!(fp->f_flag & FWRITE)) {
+               if (prot & VM_PROT_WRITE) {
+                       return EPERM;
+               }
+               max_prot &= ~VM_PROT_WRITE;
+       }
 
 
+       PSHM_SUBSYS_LOCK();
+       pnode = (pshmnode_t *)fp->f_data;
+       if (pnode == NULL) {
+               PSHM_SUBSYS_UNLOCK();
+               return EINVAL;
+       }
 
 
-       if ((prot & PROT_WRITE) && ((fp->f_flag & FWRITE) == 0)) {
-               return(EPERM);
+       pinfo = pnode->pinfo;
+       if (pinfo == NULL) {
+               PSHM_SUBSYS_UNLOCK();
+               return EINVAL;
        }
 
        }
 
-       if (((pnode = (struct pshmnode *)fp->f_data)) == PSHMNODE_NULL )
-               return(EINVAL);
+       if (!(pinfo->pshm_flags & PSHM_ALLOCATED)) {
+               PSHM_SUBSYS_UNLOCK();
+               return EINVAL;
+       }
 
 
-       if ((pinfo = pnode->pinfo) == PSHMINFO_NULL)
-               return(EINVAL);
+       if (user_size > (vm_map_size_t)pinfo->pshm_length) {
+               PSHM_SUBSYS_UNLOCK();
+               return EINVAL;
+       }
 
 
-       if ((pinfo->pshm_flags & PSHM_ALLOCATED) != PSHM_ALLOCATED) {
-               return(EINVAL);
+       vm_map_size_t end_pos = 0;
+       if (os_add_overflow(user_size, file_pos, &end_pos)) {
+               PSHM_SUBSYS_UNLOCK();
+               return EINVAL;
        }
        }
-       if (user_size > pinfo->pshm_length) {
-               return(EINVAL);
+       if (end_pos > (vm_map_size_t)pinfo->pshm_length) {
+               PSHM_SUBSYS_UNLOCK();
+               return EINVAL;
        }
        }
-       if ((off_t)user_size  + file_pos > pinfo->pshm_length) {
-               return(EINVAL);
+
+       pshmobj = SLIST_FIRST(&pinfo->pshm_mobjs);
+       if (pshmobj == NULL) {
+               PSHM_SUBSYS_UNLOCK();
+               return EINVAL;
+       }
+
+#if CONFIG_MACF
+       error = mac_posixshm_check_mmap(kauth_cred_get(), &pinfo->pshm_hdr, prot, flags);
+       if (error) {
+               PSHM_SUBSYS_UNLOCK();
+               return error;
        }
        }
-       if ((mem_object =  pinfo->pshm_memobject) == NULL) {
-               return(EINVAL);
+#endif
+       /* Grab an extra reference, so we can drop the lock while mapping. */
+       error = pshm_ref(pinfo);
+       if (error) {
+               PSHM_SUBSYS_UNLOCK();
+               return error;
        }
 
        }
 
-       
+       PSHM_SUBSYS_UNLOCK();
        user_map = current_map();
 
        user_map = current_map();
 
-       if ((flags & MAP_FIXED) == 0) {
-               find_space = TRUE;
-               user_addr = round_page_32(user_addr); 
+       if (!(flags & MAP_FIXED)) {
+               alloc_flags = VM_FLAGS_ANYWHERE;
+               user_addr = vm_map_round_page(user_addr,
+                   vm_map_page_mask(user_map));
        } else {
        } else {
-               if (user_addr != trunc_page_32(user_addr))
-                       return (EINVAL);
-               find_space = FALSE;
-               (void) vm_deallocate(user_map, user_addr, user_size);
+               if (user_addr != vm_map_round_page(user_addr,
+                   vm_map_page_mask(user_map))) {
+                       error = EINVAL;
+                       goto out_deref;
+               }
+
+               /*
+                * We do not get rid of the existing mappings here because
+                * it wouldn't be atomic (see comment in mmap()).  We let
+                * Mach VM know that we want it to replace any existing
+                * mapping with the new one.
+                */
+               alloc_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
+       }
+       docow = false;
+
+       mapped_size = 0;
+       vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
+       /* reserve the entire space first... */
+       kret = vm_map_enter_mem_object(user_map,
+           &user_addr,
+           user_size,
+           0,
+           alloc_flags,
+           vmk_flags,
+           VM_KERN_MEMORY_NONE,
+           IPC_PORT_NULL,
+           0,
+           false,
+           VM_PROT_NONE,
+           VM_PROT_NONE,
+           VM_INHERIT_NONE);
+       user_start_addr = user_addr;
+       if (kret != KERN_SUCCESS) {
+               goto out_deref;
        }
        }
-       docow = FALSE;  
 
 
-       kret = vm_map_64(user_map, &user_addr, user_size,
-                       0, find_space, pinfo->pshm_memobject, file_pos, docow,
-                       prot, VM_PROT_DEFAULT, 
-                       VM_INHERIT_DEFAULT);
+       /* Now overwrite with the real mappings. */
+       for (map_pos = 0, pshmobj = SLIST_FIRST(&pinfo->pshm_mobjs);
+           user_size != 0;
+           map_pos += pshmobj->pshmo_size, pshmobj = SLIST_NEXT(pshmobj, pshmo_next)) {
+               if (pshmobj == NULL) {
+                       /* nothing there to map !? */
+                       goto out_deref;
+               }
+               if (file_pos >= map_pos + pshmobj->pshmo_size) {
+                       continue;
+               }
+               map_size = pshmobj->pshmo_size - (file_pos - map_pos);
+               if (map_size > user_size) {
+                       map_size = user_size;
+               }
+               vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
+               kret = vm_map_enter_mem_object(
+                       user_map,
+                       &user_addr,
+                       map_size,
+                       0,
+                       VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
+                       vmk_flags,
+                       VM_KERN_MEMORY_NONE,
+                       pshmobj->pshmo_memobject,
+                       file_pos - map_pos,
+                       docow,
+                       prot,
+                       max_prot,
+                       VM_INHERIT_SHARE);
+               if (kret != KERN_SUCCESS) {
+                       goto out_deref;
+               }
 
 
-       if (kret != KERN_SUCCESS) 
-                       goto out;
-       kret = vm_inherit(user_map, user_addr, user_size,
-                               VM_INHERIT_SHARE);
-       if (kret != KERN_SUCCESS) {
-               (void) vm_deallocate(user_map, user_addr, user_size);
-               goto out;
+               user_addr += map_size;
+               user_size -= map_size;
+               mapped_size += map_size;
+               file_pos += map_size;
        }
        }
-       pnode->mapp_addr = user_addr;
-       pnode->map_size = user_size;
+
+       PSHM_SUBSYS_LOCK();
+       pnode->mapp_addr = user_start_addr;
        pinfo->pshm_flags |= (PSHM_MAPPED | PSHM_INUSE);
        pinfo->pshm_flags |= (PSHM_MAPPED | PSHM_INUSE);
-out:
+       PSHM_SUBSYS_UNLOCK();
+out_deref:
+       PSHM_SUBSYS_LOCK();
+       pshm_deref(pinfo);      /* drop the extra reference we had while mapping. */
+       PSHM_SUBSYS_UNLOCK();
+       if (kret != KERN_SUCCESS) {
+               if (mapped_size != 0) {
+                       (void) mach_vm_deallocate(current_map(),
+                           user_start_addr,
+                           mapped_size);
+               }
+       }
+
        switch (kret) {
        case KERN_SUCCESS:
        switch (kret) {
        case KERN_SUCCESS:
-               *fdflags(p, fd) |= UF_MAPPED;
-               *retval = (register_t)(user_addr + pageoff);
-               return (0);
+               *retval = (user_start_addr + pageoff);
+               return 0;
        case KERN_INVALID_ADDRESS:
        case KERN_NO_SPACE:
        case KERN_INVALID_ADDRESS:
        case KERN_NO_SPACE:
-               return (ENOMEM);
+               return ENOMEM;
        case KERN_PROTECTION_FAILURE:
        case KERN_PROTECTION_FAILURE:
-               return (EACCES);
+               return EACCES;
        default:
        default:
-               return (EINVAL);
+               return EINVAL;
        }
        }
-
 }
 
 }
 
-struct shm_unlink_args {
-       const char *name;
-};
-
-int
-shm_unlink(p, uap, retval)
-       struct proc *p;
-       register struct shm_unlink_args *uap;
-       register_t *retval;
+/*
+ * Remove a shared memory region name from the name lookup cache.
+ */
+static int
+pshm_unlink_internal(pshm_info_t *pinfo)
 {
 {
-       register struct filedesc *fdp = p->p_fd;
-       register struct file *fp;
-       int flags, i;
-       int error=0;
-       struct pshmname nd;
-       struct pshminfo *pinfo;
-       extern struct fileops pshmops;
-       char * pnbuf;
-       char * nameptr;
-       char * cp;
-       size_t pathlen, plen;
-       int fmode, cmode ;
-       int incache = 0;
-       struct pshmnode * pnode = PSHMNODE_NULL;
-       struct pshmcache *pcache = PSHMCACHE_NULL;
-       kern_return_t kret;
-
-       pinfo = PSHMINFO_NULL;
-
-       MALLOC_ZONE(pnbuf, caddr_t,
-                       MAXPATHLEN, M_NAMEI, M_WAITOK);
-       pathlen = MAXPATHLEN;
-       error = copyinstr((void *)uap->name, (void *)pnbuf,
-               MAXPATHLEN, &pathlen);
-       if (error) {
-               goto bad;
-       }
-       AUDIT_ARG(text, pnbuf);
-       if (pathlen > PSHMNAMLEN) {
-               error = ENAMETOOLONG;
-               goto bad;
-       }
+       PSHM_SUBSYS_ASSERT_HELD();
 
 
-
-#ifdef PSXSHM_NAME_RESTRICT
-       nameptr = pnbuf;
-       if (*nameptr == '/') {
-               while (*(nameptr++) == '/') {
-                       plen--;
-                       error = EINVAL;
-                       goto bad;
-               }
-        } else {
-               error = EINVAL;
-               goto bad;
+       if (pinfo == NULL) {
+               return EINVAL;
        }
        }
-#endif /* PSXSHM_NAME_RESTRICT */
 
 
-       plen = pathlen;
-       nameptr = pnbuf;
-       nd.pshm_nameptr = nameptr;
-       nd.pshm_namelen = plen;
-       nd. pshm_hash =0;
+       pshm_cache_delete(pinfo);
+       pinfo->pshm_flags |= PSHM_REMOVED;
 
 
-        for (cp = nameptr, i=1; *cp != 0 && i <= plen; i++, cp++) {
-               nd.pshm_hash += (unsigned char)*cp * i;
-       }
+       /* release the "unlink" reference */
+       pshm_deref(pinfo);
 
 
-       error = pshm_cache_search(&pinfo, &nd, &pcache);
+       return 0;
+}
 
 
-       if (error == ENOENT) {
-               error = EINVAL;
-               goto bad;
+int
+shm_unlink(proc_t p, struct shm_unlink_args *uap, __unused int32_t *retval)
+{
+       int         error = 0;
+       pshm_info_t *pinfo = NULL;
+       pshm_info_t *name_pinfo = NULL;
 
 
+       /*
+        * Get the name from user args.
+        */
+       MALLOC(name_pinfo, pshm_info_t *, sizeof(pshm_info_t), M_SHM, M_WAITOK | M_ZERO);
+       if (name_pinfo == NULL) {
+               error = ENOSPC;
+               goto bad;
        }
        }
-       if (!error) {
+       error = pshm_get_name(name_pinfo, uap->name);
+       if (error != 0) {
                error = EINVAL;
                goto bad;
                error = EINVAL;
                goto bad;
-       } else
-               incache = 1;
+       }
+
+       PSHM_SUBSYS_LOCK();
+       pinfo = pshm_cache_search(name_pinfo);
 
 
-       if ((pinfo->pshm_flags & (PSHM_DEFINED | PSHM_ALLOCATED))==0) {
-               return (EINVAL);
+       if (pinfo == NULL) {
+               error = ENOENT;
+               goto bad_unlock;
        }
 
        }
 
-       if (pinfo->pshm_flags & PSHM_INDELETE) {
-               error = 0;
-               goto bad;
+#if CONFIG_MACF
+       error = mac_posixshm_check_unlink(kauth_cred_get(), &pinfo->pshm_hdr, name_pinfo->pshm_hdr.pshm_name);
+       if (error) {
+               goto bad_unlock;
        }
        }
+#endif
 
 
-       if (pinfo->pshm_memobject == NULL) {
-               error = EINVAL;
-               goto bad;
+       AUDIT_ARG(posix_ipc_perm, pinfo->pshm_uid, pinfo->pshm_gid, pinfo->pshm_mode);
+
+       /*
+        * Following file semantics, unlink should normally be allowed
+        * for users with write permission only. We also allow the creator
+        * of a segment to be able to delete, even w/o write permission.
+        * That's because there's no equivalent of write permission for the
+        * directory containing a file.
+        */
+       error = pshm_access(pinfo, FWRITE, kauth_cred_get(), p);
+       if (error != 0 && pinfo->pshm_uid != kauth_getuid()) {
+               goto bad_unlock;
        }
 
        }
 
-       AUDIT_ARG(posix_ipc_perm, pinfo->pshm_uid, pinfo->pshm_gid,
-                 pinfo->pshm_mode);
-       pinfo->pshm_flags |= PSHM_INDELETE;
-       pinfo->pshm_usecount--;
-       kret = mach_destroy_memory_entry(pinfo->pshm_memobject);
-       pshm_cache_delete(pcache);
-       _FREE(pcache, M_SHM);
-       pinfo->pshm_flags |= PSHM_REMOVED;
-       error = 0;
+       error = pshm_unlink_internal(pinfo);
+bad_unlock:
+       PSHM_SUBSYS_UNLOCK();
 bad:
 bad:
-       FREE_ZONE(pnbuf, MAXPATHLEN, M_NAMEI);
-       return (error);
-out:
-       switch (kret) {
-       case KERN_INVALID_ADDRESS:
-       case KERN_PROTECTION_FAILURE:
-               return (EACCES);
-       default:
-               return (EINVAL);
+       if (name_pinfo != NULL) {
+               FREE(name_pinfo, M_SHM);
        }
        }
+       return error;
 }
 
 }
 
-int
-pshm_close(pnode, flags, cred, p)
-       register struct pshmnode *pnode;
-       int flags;
-       struct ucred *cred;
-       struct proc *p;
+/*
+ * Add a new reference to a shared memory region.
+ * Fails if we will overflow the reference counter.
+ */
+static int
+pshm_ref(pshm_info_t *pinfo)
 {
 {
-       int error=0;
-       kern_return_t kret;
-       register struct pshminfo *pinfo;
-
-       if ((pinfo = pnode->pinfo) == PSHMINFO_NULL)
-               return(EINVAL);
+       PSHM_SUBSYS_ASSERT_HELD();
 
 
-       if ((pinfo->pshm_flags & PSHM_ALLOCATED) != PSHM_ALLOCATED) {
-               return(EINVAL);
+       if (pinfo->pshm_usecount == PSHM_MAXCOUNT) {
+               return EMFILE;
        }
        }
-#if DIAGNOSTIC
-       if(!pinfo->pshm_usecount) {
-               kprintf("negative usecount in pshm_close\n");
+       pinfo->pshm_usecount++;
+       return 0;
+}
+
+/*
+ * Dereference a pshm_info_t. Delete the region if
+ * this was the final reference count.
+ */
+static void
+pshm_deref(pshm_info_t *pinfo)
+{
+       pshm_mobj_t *pshmobj;
+
+       PSHM_SUBSYS_ASSERT_HELD();
+       if (pinfo->pshm_usecount == 0) {
+               panic("negative usecount in pshm_close\n");
        }
        }
-#endif /* DIAGNOSTIC */
-       pinfo->pshm_usecount--;
+       pinfo->pshm_usecount--; /* release this fd's reference */
 
 
-       if ((pinfo->pshm_flags & PSHM_REMOVED) && !pinfo->pshm_usecount) {
-               _FREE(pinfo,M_SHM);
+       if (pinfo->pshm_usecount == 0) {
+#if CONFIG_MACF
+               mac_posixshm_label_destroy(&pinfo->pshm_hdr);
+#endif
+               PSHM_SUBSYS_UNLOCK();
+
+               /*
+                * Release references to any backing objects.
+                */
+               while ((pshmobj = SLIST_FIRST(&pinfo->pshm_mobjs)) != NULL) {
+                       SLIST_REMOVE_HEAD(&pinfo->pshm_mobjs, pshmo_next);
+                       mach_memory_entry_port_release(pshmobj->pshmo_memobject);
+                       FREE(pshmobj, M_SHM);
+               }
+
+               /* free the pinfo itself */
+               FREE(pinfo, M_SHM);
+
+               PSHM_SUBSYS_LOCK();
        }
        }
-       _FREE(pnode, M_SHM);
-       return (error);
 }
 
 }
 
+/* vfs_context_t passed to match prototype for struct fileops */
 static int
 static int
-pshm_closefile(fp, p)
-       struct file *fp;
-       struct proc *p;
+pshm_closefile(struct fileglob *fg, __unused vfs_context_t ctx)
 {
 {
-       return (pshm_close(((struct pshmnode *)fp->f_data), fp->f_flag,
-               fp->f_cred, p));
-}
+       int        error = EINVAL;
+       pshmnode_t *pnode;
 
 
-static int
-pshm_read(fp, uio, cred, flags, p)
-       struct file *fp;
-       struct uio *uio;
-       struct ucred *cred;
-       int flags;
-       struct proc *p;
-{
-       return(EOPNOTSUPP);
-}
+       PSHM_SUBSYS_LOCK();
 
 
-static int
-pshm_write(fp, uio, cred, flags, p)
-       struct file *fp;
-       struct uio *uio;
-       struct ucred *cred;
-       int flags;
-       struct proc *p;
-{
-       return(EOPNOTSUPP);
-}
+       pnode = (pshmnode_t *)fg->fg_data;
+       if (pnode != NULL) {
+               error = 0;
+               fg->fg_data = NULL; /* set fg_data to NULL to avoid racing close()es */
+               if (pnode->pinfo != NULL) {
+                       pshm_deref(pnode->pinfo);
+                       pnode->pinfo = NULL;
+               }
+       }
 
 
-static int
-pshm_ioctl(fp, com, data, p)
-       struct file *fp;
-       u_long com;
-       caddr_t data;
-       struct proc *p;
-{
-       return(EOPNOTSUPP);
+       PSHM_SUBSYS_UNLOCK();
+       if (pnode != NULL) {
+               FREE(pnode, M_SHM);
+       }
+
+       return error;
 }
 
 }
 
-static int
-pshm_select(fp, which, wql, p)
-       struct file *fp;
-       int which;
-       void *wql;
-       struct proc *p;
+int
+fill_pshminfo(pshmnode_t * pshm, struct pshm_info * info)
 {
 {
-       return(EOPNOTSUPP);
+       pshm_info_t *pinfo;
+       struct vinfo_stat *sb;
+
+       PSHM_SUBSYS_LOCK();
+       if ((pinfo = pshm->pinfo) == NULL) {
+               PSHM_SUBSYS_UNLOCK();
+               return EINVAL;
+       }
+
+       sb = &info->pshm_stat;
+
+       bzero(sb, sizeof(struct vinfo_stat));
+       sb->vst_mode = pinfo->pshm_mode;
+       sb->vst_uid = pinfo->pshm_uid;
+       sb->vst_gid = pinfo->pshm_gid;
+       sb->vst_size = pinfo->pshm_length;
+
+       info->pshm_mappaddr = pshm->mapp_addr;
+       bcopy(&pinfo->pshm_hdr.pshm_name[0], &info->pshm_name[0], PSHMNAMLEN + 1);
+
+       PSHM_SUBSYS_UNLOCK();
+       return 0;
 }
 
 }
 
-static int
-pshm_kqfilter(fp, kn, p)
-       struct file *fp;
-       struct knote *kn;
-       struct proc *p;
+#if CONFIG_MACF
+void
+pshm_label_associate(struct fileproc *fp, struct vnode *vp, vfs_context_t ctx)
 {
 {
-       return(EOPNOTSUPP);
+       pshmnode_t *pnode;
+       pshm_info_t *pshm;
+
+       PSHM_SUBSYS_LOCK();
+       pnode = (pshmnode_t *)fp->f_data;
+       if (pnode != NULL) {
+               pshm = pnode->pinfo;
+               if (pshm != NULL) {
+                       mac_posixshm_vnode_label_associate(
+                               vfs_context_ucred(ctx), &pshm->pshm_hdr, pshm->pshm_label,
+                               vp, vp->v_label);
+               }
+       }
+       PSHM_SUBSYS_UNLOCK();
 }
 }
+#endif