]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/kern/sysv_shm.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / kern / sysv_shm.c
index c626909e086ce78618a876250d8d1546957975f9..a3a6ea9330897713f2c53339de19b197bd42fc34 100644 (file)
@@ -1,23 +1,29 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
- * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*     $NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $      */
 
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
+/*
+ * NOTICE: This file was modified by McAfee Research in 2004 to introduce
+ * support for mandatory and extensible security protections.  This notice
+ * is included in support of clause 2.2 (b) of the Apple Public License,
+ * Version 2.0.
+ * Copyright (c) 2005-2006 SPARTA, Inc.
+ */
 
 
 #include <sys/appleapiopts.h>
 #include <sys/ipcs.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
+#if CONFIG_MACF
+#include <security/mac_framework.h>
+#endif
 
-#include <bsm/audit_kernel.h>
+#include <security/audit/audit.h>
 
 #include <mach/mach_types.h>
 #include <mach/vm_inherit.h>
 #include <mach/mach_vm.h>
 
 #include <vm/vm_map.h>
-#include <vm/vm_shared_memory_server.h>
 #include <vm/vm_protos.h>
+#include <vm/vm_kern.h>
 
 #include <kern/locks.h>
+#include <os/overflow.h>
+
+/* Uncomment this line to see MAC debugging output. */
+/* #define MAC_DEBUG */
+#if CONFIG_MACF_DEBUG
+#define MPRINTF(a)      printf a
+#else
+#define MPRINTF(a)
+#endif
 
-static void shminit(void *);
-#if 0
-SYSINIT(sysv_shm, SI_SUB_SYSV_SHM, SI_ORDER_FIRST, shminit, NULL)
-#endif 0
+#if SYSV_SHM
+static int shminit(void);
 
-static lck_grp_t       *sysv_shm_subsys_lck_grp;
-static lck_grp_attr_t  *sysv_shm_subsys_lck_grp_attr;
-static lck_attr_t      *sysv_shm_subsys_lck_attr;
-static lck_mtx_t        sysv_shm_subsys_mutex;
+static LCK_GRP_DECLARE(sysv_shm_subsys_lck_grp, "sysv_shm_subsys_lock");
+static LCK_MTX_DECLARE(sysv_shm_subsys_mutex, &sysv_shm_subsys_lck_grp);
 
 #define SYSV_SHM_SUBSYS_LOCK() lck_mtx_lock(&sysv_shm_subsys_mutex)
 #define SYSV_SHM_SUBSYS_UNLOCK() lck_mtx_unlock(&sysv_shm_subsys_mutex)
@@ -97,57 +118,75 @@ static lck_mtx_t        sysv_shm_subsys_mutex;
 static int oshmctl(void *p, void *uap, void *retval);
 static int shmget_allocate_segment(struct proc *p, struct shmget_args *uap, int mode, int * retval);
 static int shmget_existing(struct shmget_args *uap, int mode, int segnum, int  * retval);
-static void shmid_ds_64to32(struct user_shmid_ds *in, struct shmid_ds *out);
-static void shmid_ds_32to64(struct shmid_ds *in, struct user_shmid_ds *out);
+static void shmid_ds_64to32(struct user_shmid_ds *in, struct user32_shmid_ds *out);
+static void shmid_ds_32to64(struct user32_shmid_ds *in, struct user_shmid_ds *out);
 
 /* XXX casting to (sy_call_t *) is bogus, as usual. */
-static sy_call_t *shmcalls[] = {
+static sy_call_t* const shmcalls[] = {
        (sy_call_t *)shmat, (sy_call_t *)oshmctl,
        (sy_call_t *)shmdt, (sy_call_t *)shmget,
        (sy_call_t *)shmctl
 };
 
-#define        SHMSEG_FREE             0x0200
-#define        SHMSEG_REMOVED          0x0400
-#define        SHMSEG_ALLOCATED        0x0800
-#define        SHMSEG_WANTED           0x1000
+#define SHMSEG_FREE             0x0200
+#define SHMSEG_REMOVED          0x0400
+#define SHMSEG_ALLOCATED        0x0800
+#define SHMSEG_WANTED           0x1000
 
 static int shm_last_free, shm_nused, shm_committed;
-struct user_shmid_ds   *shmsegs;       /* 64 bit version */
+struct shmid_kernel     *shmsegs;       /* 64 bit version */
 static int shm_inited = 0;
 
+/*
+ * Since anonymous memory chunks are limited to ANON_MAX_SIZE bytes,
+ * we have to keep a list of chunks when we want to handle a shared memory
+ * segment bigger than ANON_MAX_SIZE.
+ * Each chunk points to a VM named entry of up to ANON_MAX_SIZE bytes
+ * of anonymous memory.
+ */
 struct shm_handle {
-       void * shm_object;              /* vm_offset_t kva; */
+       void * shm_object;                      /* named entry for this chunk*/
+       memory_object_size_t shm_handle_size;   /* size of this chunk */
+       struct shm_handle *shm_handle_next;     /* next chunk */
 };
 
 struct shmmap_state {
-       mach_vm_address_t va;           /* user address */
-       int shmid;                      /* segment id */
+       mach_vm_address_t va;           /* user address */
+       int shmid;                      /* segment id */
 };
 
-static void shm_deallocate_segment(struct user_shmid_ds *);
+static void shm_deallocate_segment(struct shmid_kernel *);
 static int shm_find_segment_by_key(key_t);
-static struct user_shmid_ds *shm_find_segment_by_shmid(int);
+static struct shmid_kernel *shm_find_segment_by_shmid(int);
 static int shm_delete_mapping(struct proc *, struct shmmap_state *, int);
 
 #ifdef __APPLE_API_PRIVATE
-struct  shminfo shminfo = {
-        -1,    /* SHMMAX 4096 *1024 */
-        -1,    /* SHMMIN = 1 */
-        -1,    /* SHMMNI = 1 */
-        -1,    /* SHMSEG = 8 */
-        -1     /* SHMALL = 1024 */
+#define DEFAULT_SHMMAX  (4 * 1024 * 1024)
+#define DEFAULT_SHMMIN  1
+#define DEFAULT_SHMMNI  32
+#define DEFAULT_SHMSEG  8
+#define DEFAULT_SHMALL  1024
+
+struct shminfo shminfo = {
+       .shmmax = DEFAULT_SHMMAX,
+       .shmmin = DEFAULT_SHMMIN,
+       .shmmni = DEFAULT_SHMMNI,
+       .shmseg = DEFAULT_SHMSEG,
+       .shmall = DEFAULT_SHMALL
 };
-#endif /* __APPLE_API_PRIVATE */
 
-void sysv_shm_lock_init(void);
+#define SHMID_IS_VALID(x) ((x) >= 0)
+#define SHMID_UNALLOCATED (-1)
+#define SHMID_SENTINEL    (-2)
+
+#endif /* __APPLE_API_PRIVATE */
 
 static __inline__ time_t
 sysv_shmtime(void)
 {
-       struct timeval  tv;
+       struct timeval  tv;
        microtime(&tv);
-       return (tv.tv_sec);
+       return tv.tv_sec;
 }
 
 /*
@@ -157,17 +196,17 @@ sysv_shmtime(void)
  * NOTE: Source and target may *NOT* overlap! (target is smaller)
  */
 static void
-shmid_ds_64to32(struct user_shmid_ds *in, struct shmid_ds *out)
+shmid_ds_64to32(struct user_shmid_ds *in, struct user32_shmid_ds *out)
 {
        out->shm_perm = in->shm_perm;
-       out->shm_segsz = (size_t)in->shm_segsz;
+       out->shm_segsz = in->shm_segsz;
        out->shm_lpid = in->shm_lpid;
        out->shm_cpid = in->shm_cpid;
        out->shm_nattch = in->shm_nattch;
        out->shm_atime = in->shm_atime;
        out->shm_dtime = in->shm_dtime;
        out->shm_ctime = in->shm_ctime;
-       out->shm_internal = CAST_DOWN(void *,in->shm_internal);
+       out->shm_internal = CAST_DOWN_EXPLICIT(int, in->shm_internal);
 }
 
 /*
@@ -176,16 +215,16 @@ shmid_ds_64to32(struct user_shmid_ds *in, struct shmid_ds *out)
  * the beginning.
  */
 static void
-shmid_ds_32to64(struct shmid_ds *in, struct user_shmid_ds *out)
+shmid_ds_32to64(struct user32_shmid_ds *in, struct user_shmid_ds *out)
 {
-       out->shm_internal = CAST_USER_ADDR_T(in->shm_internal);
+       out->shm_internal = in->shm_internal;
        out->shm_ctime = in->shm_ctime;
        out->shm_dtime = in->shm_dtime;
        out->shm_atime = in->shm_atime;
        out->shm_nattch = in->shm_nattch;
        out->shm_cpid = in->shm_cpid;
        out->shm_lpid = in->shm_lpid;
-       out->shm_segsz = (user_size_t)in->shm_segsz;
+       out->shm_segsz = in->shm_segsz;
        out->shm_perm = in->shm_perm;
 }
 
@@ -195,67 +234,81 @@ shm_find_segment_by_key(key_t key)
 {
        int i;
 
-       for (i = 0; i < shminfo.shmmni; i++)
-               if ((shmsegs[i].shm_perm.mode & SHMSEG_ALLOCATED) &&
-                   shmsegs[i].shm_perm.key == key)
+       for (i = 0; i < shminfo.shmmni; i++) {
+               if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) &&
+                   shmsegs[i].u.shm_perm._key == key) {
                        return i;
+               }
+       }
        return -1;
 }
 
-static struct user_shmid_ds *
+static struct shmid_kernel *
 shm_find_segment_by_shmid(int shmid)
 {
        int segnum;
-       struct user_shmid_ds *shmseg;
+       struct shmid_kernel *shmseg;
 
        segnum = IPCID_TO_IX(shmid);
-       if (segnum < 0 || segnum >= shminfo.shmmni)
+       if (segnum < 0 || segnum >= shminfo.shmmni) {
                return NULL;
+       }
        shmseg = &shmsegs[segnum];
-       if ((shmseg->shm_perm.mode & (SHMSEG_ALLOCATED | SHMSEG_REMOVED))
+       if ((shmseg->u.shm_perm.mode & (SHMSEG_ALLOCATED | SHMSEG_REMOVED))
            != SHMSEG_ALLOCATED ||
-           shmseg->shm_perm.seq != IPCID_TO_SEQ(shmid))
+           shmseg->u.shm_perm._seq != IPCID_TO_SEQ(shmid)) {
                return NULL;
+       }
        return shmseg;
 }
 
 static void
-shm_deallocate_segment(struct user_shmid_ds *shmseg)
+shm_deallocate_segment(struct shmid_kernel *shmseg)
 {
-       struct shm_handle *shm_handle;
+       struct shm_handle *shm_handle, *shm_handle_next;
        mach_vm_size_t size;
 
-       shm_handle = CAST_DOWN(void *,shmseg->shm_internal);    /* tunnel */
-       size = mach_vm_round_page(shmseg->shm_segsz);
-       mach_memory_entry_port_release(shm_handle->shm_object);
-       shm_handle->shm_object = NULL;
-       FREE((caddr_t)shm_handle, M_SHM);
-       shmseg->shm_internal = USER_ADDR_NULL;          /* tunnel */
+       for (shm_handle = CAST_DOWN(void *, shmseg->u.shm_internal); /* tunnel */
+           shm_handle != NULL;
+           shm_handle = shm_handle_next) {
+               shm_handle_next = shm_handle->shm_handle_next;
+               mach_memory_entry_port_release(shm_handle->shm_object);
+               kheap_free(KM_SHM, shm_handle, sizeof(struct shm_handle));
+       }
+       shmseg->u.shm_internal = USER_ADDR_NULL;                /* tunnel */
+       size = vm_map_round_page(shmseg->u.shm_segsz,
+           vm_map_page_mask(current_map()));
        shm_committed -= btoc(size);
        shm_nused--;
-       shmseg->shm_perm.mode = SHMSEG_FREE;
+       shmseg->u.shm_perm.mode = SHMSEG_FREE;
+#if CONFIG_MACF
+       /* Reset the MAC label */
+       mac_sysvshm_label_recycle(shmseg);
+#endif
 }
 
 static int
 shm_delete_mapping(__unused struct proc *p, struct shmmap_state *shmmap_s,
-       int deallocate)
+    int deallocate)
 {
-       struct user_shmid_ds *shmseg;
+       struct shmid_kernel *shmseg;
        int segnum, result;
        mach_vm_size_t size;
 
        segnum = IPCID_TO_IX(shmmap_s->shmid);
        shmseg = &shmsegs[segnum];
-       size = mach_vm_round_page(shmseg->shm_segsz);   /* XXX done for us? */
+       size = vm_map_round_page(shmseg->u.shm_segsz,
+           vm_map_page_mask(current_map())); /* XXX done for us? */
        if (deallocate) {
-       result = mach_vm_deallocate(current_map(), shmmap_s->va, size);
-       if (result != KERN_SUCCESS)
-               return EINVAL;
+               result = mach_vm_deallocate(current_map(), shmmap_s->va, size);
+               if (result != KERN_SUCCESS) {
+                       return EINVAL;
+               }
        }
-       shmmap_s->shmid = -1;
-       shmseg->shm_dtime = sysv_shmtime();
-       if ((--shmseg->shm_nattch <= 0) &&
-           (shmseg->shm_perm.mode & SHMSEG_REMOVED)) {
+       shmmap_s->shmid = SHMID_UNALLOCATED;
+       shmseg->u.shm_dtime = sysv_shmtime();
+       if ((--shmseg->u.shm_nattch <= 0) &&
+           (shmseg->u.shm_perm.mode & SHMSEG_REMOVED)) {
                shm_deallocate_segment(shmseg);
                shm_last_free = segnum;
        }
@@ -263,39 +316,57 @@ shm_delete_mapping(__unused struct proc *p, struct shmmap_state *shmmap_s,
 }
 
 int
-shmdt(struct proc *p, struct shmdt_args *uap, register_t *retval)
+shmdt(struct proc *p, struct shmdt_args *uap, int32_t *retval)
 {
+#if CONFIG_MACF
+       struct shmid_kernel *shmsegptr;
+#endif
        struct shmmap_state *shmmap_s;
        int i;
        int shmdtret = 0;
 
-       // LP64todo - fix this
-       AUDIT_ARG(svipc_addr, CAST_DOWN(void *,uap->shmaddr));
+       AUDIT_ARG(svipc_addr, uap->shmaddr);
 
        SYSV_SHM_SUBSYS_LOCK();
 
-       if (!shm_inited) {
-               shmdtret = EINVAL;
+       if ((shmdtret = shminit())) {
                goto shmdt_out;
        }
+
        shmmap_s = (struct shmmap_state *)p->vm_shm;
-       if (shmmap_s == NULL) {
+       if (shmmap_s == NULL) {
                shmdtret = EINVAL;
                goto shmdt_out;
        }
 
-       for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
-               if (shmmap_s->shmid != -1 &&
-                   shmmap_s->va == (mach_vm_offset_t)uap->shmaddr)
+       for (; shmmap_s->shmid != SHMID_SENTINEL; shmmap_s++) {
+               if (SHMID_IS_VALID(shmmap_s->shmid) &&
+                   shmmap_s->va == (mach_vm_offset_t)uap->shmaddr) {
                        break;
-       if (i == shminfo.shmseg) {
+               }
+       }
+
+       if (!SHMID_IS_VALID(shmmap_s->shmid)) {
                shmdtret = EINVAL;
                goto shmdt_out;
        }
+
+#if CONFIG_MACF
+       /*
+        * XXX: It might be useful to move this into the shm_delete_mapping
+        * function
+        */
+       shmsegptr = &shmsegs[IPCID_TO_IX(shmmap_s->shmid)];
+       shmdtret = mac_sysvshm_check_shmdt(kauth_cred_get(), shmsegptr);
+       if (shmdtret) {
+               goto shmdt_out;
+       }
+#endif
        i = shm_delete_mapping(p, shmmap_s, 1);
 
-       if (i == 0)
+       if (i == 0) {
                *retval = 0;
+       }
        shmdtret = i;
 shmdt_out:
        SYSV_SHM_SUBSYS_UNLOCK();
@@ -303,122 +374,207 @@ shmdt_out:
 }
 
 int
-shmat(struct proc *p, struct shmat_args *uap, register_t *retval)
+shmat(struct proc *p, struct shmat_args *uap, user_addr_t *retval)
 {
        int error, i, flags;
-       struct user_shmid_ds    *shmseg;
-       struct shmmap_state     *shmmap_s = NULL;
-       struct shm_handle       *shm_handle;
-       mach_vm_address_t       attach_va;      /* attach address in/out */
-       mach_vm_size_t          map_size;       /* size of map entry */
-       vm_prot_t               prot;
-       size_t                  size;
-       kern_return_t           rv;
-       int shmat_ret = 0;
+       struct shmid_kernel     *shmseg;
+       struct shmmap_state     *shmmap_s = NULL;
+       struct shm_handle       *shm_handle;
+       mach_vm_address_t       attach_va;      /* attach address in/out */
+       mach_vm_address_t       shmlba;
+       mach_vm_size_t          map_size;       /* size of map entry */
+       mach_vm_size_t          mapped_size;
+       vm_prot_t           prot;
+       size_t              size;
+       kern_return_t           rv;
+       int                     shmat_ret;
+       int                     vm_flags;
+
+       shmat_ret = 0;
 
        AUDIT_ARG(svipc_id, uap->shmid);
-       // LP64todo - fix this
-       AUDIT_ARG(svipc_addr, CAST_DOWN(void *,uap->shmaddr));
+       AUDIT_ARG(svipc_addr, uap->shmaddr);
 
        SYSV_SHM_SUBSYS_LOCK();
 
-       if (!shm_inited) {
-               shmat_ret = EINVAL;
+       if ((shmat_ret = shminit())) {
                goto shmat_out;
        }
 
        shmmap_s = (struct shmmap_state *)p->vm_shm;
-
        if (shmmap_s == NULL) {
-               size = shminfo.shmseg * sizeof(struct shmmap_state);
-               MALLOC(shmmap_s, struct shmmap_state *, size, M_SHM, M_WAITOK);
+               /* lazily allocate the shm map */
+
+               int nsegs = shminfo.shmseg;
+               if (nsegs <= 0) {
+                       shmat_ret = EMFILE;
+                       goto shmat_out;
+               }
+
+               /* +1 for the sentinel */
+               if (os_add_and_mul_overflow(nsegs, 1, sizeof(struct shmmap_state), &size)) {
+                       shmat_ret = ENOMEM;
+                       goto shmat_out;
+               }
+
+               shmmap_s = kheap_alloc(KM_SHM, size, Z_WAITOK);
                if (shmmap_s == NULL) {
                        shmat_ret = ENOMEM;
                        goto shmat_out;
                }
-               for (i = 0; i < shminfo.shmseg; i++)
-                       shmmap_s[i].shmid = -1;
+
+               /* initialize the entries */
+               for (i = 0; i < nsegs; i++) {
+                       shmmap_s[i].shmid = SHMID_UNALLOCATED;
+               }
+               shmmap_s[i].shmid = SHMID_SENTINEL;
+
                p->vm_shm = (caddr_t)shmmap_s;
        }
+
        shmseg = shm_find_segment_by_shmid(uap->shmid);
        if (shmseg == NULL) {
                shmat_ret = EINVAL;
                goto shmat_out;
        }
 
-       AUDIT_ARG(svipc_perm, &shmseg->shm_perm);
-       error = ipcperm(kauth_cred_get(), &shmseg->shm_perm,
-           (uap->shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
+       AUDIT_ARG(svipc_perm, &shmseg->u.shm_perm);
+       error = ipcperm(kauth_cred_get(), &shmseg->u.shm_perm,
+           (uap->shmflg & SHM_RDONLY) ? IPC_R : IPC_R | IPC_W);
        if (error) {
                shmat_ret = error;
                goto shmat_out;
        }
 
-       for (i = 0; i < shminfo.shmseg; i++) {
-               if (shmmap_s->shmid == -1)
-                       break;
+#if CONFIG_MACF
+       error = mac_sysvshm_check_shmat(kauth_cred_get(), shmseg, uap->shmflg);
+       if (error) {
+               shmat_ret = error;
+               goto shmat_out;
+       }
+#endif
+
+       /* find a free shmid */
+       while (SHMID_IS_VALID(shmmap_s->shmid)) {
                shmmap_s++;
        }
-       if (i >= shminfo.shmseg) {
+       if (shmmap_s->shmid != SHMID_UNALLOCATED) {
+               /* no free shmids */
                shmat_ret = EMFILE;
                goto shmat_out;
        }
 
-       map_size = mach_vm_round_page(shmseg->shm_segsz);
+       map_size = vm_map_round_page(shmseg->u.shm_segsz,
+           vm_map_page_mask(current_map()));
        prot = VM_PROT_READ;
-       if ((uap->shmflg & SHM_RDONLY) == 0)
+       if ((uap->shmflg & SHM_RDONLY) == 0) {
                prot |= VM_PROT_WRITE;
+       }
        flags = MAP_ANON | MAP_SHARED;
-       if (uap->shmaddr)
+       if (uap->shmaddr) {
                flags |= MAP_FIXED;
+       }
 
        attach_va = (mach_vm_address_t)uap->shmaddr;
-       if (uap->shmflg & SHM_RND)
-               attach_va &= ~(SHMLBA-1);
-       else if ((attach_va & (SHMLBA-1)) != 0) {
+       shmlba = vm_map_page_size(current_map()); /* XXX instead of SHMLBA */
+       if (uap->shmflg & SHM_RND) {
+               attach_va &= ~(shmlba - 1);
+       } else if ((attach_va & (shmlba - 1)) != 0) {
                shmat_ret = EINVAL;
                goto shmat_out;
        }
 
-       shm_handle = CAST_DOWN(void *, shmseg->shm_internal);   /* tunnel */
+       if (flags & MAP_FIXED) {
+               vm_flags = VM_FLAGS_FIXED;
+       } else {
+               vm_flags = VM_FLAGS_ANYWHERE;
+       }
 
-       rv = mach_vm_map(current_map(),                 /* process map */
-                       &attach_va,                     /* attach address */
-                       map_size,                       /* segment size */
-                       (mach_vm_offset_t)0,            /* alignment mask */
-               (flags & MAP_FIXED)? VM_FLAGS_FIXED: VM_FLAGS_ANYWHERE,
+       mapped_size = 0;
+
+       /* first reserve enough space... */
+       rv = mach_vm_map_kernel(current_map(),
+           &attach_va,
+           map_size,
+           0,
+           vm_flags,
+           VM_MAP_KERNEL_FLAGS_NONE,
+           VM_KERN_MEMORY_NONE,
+           IPC_PORT_NULL,
+           0,
+           FALSE,
+           VM_PROT_NONE,
+           VM_PROT_NONE,
+           VM_INHERIT_NONE);
+       if (rv != KERN_SUCCESS) {
+               goto out;
+       }
+
+       shmmap_s->va = attach_va;
+
+       /* ... then map the shared memory over the reserved space */
+       for (shm_handle = CAST_DOWN(void *, shmseg->u.shm_internal);/* tunnel */
+           shm_handle != NULL;
+           shm_handle = shm_handle->shm_handle_next) {
+               vm_map_size_t chunk_size;
+
+               assert(mapped_size < map_size);
+               chunk_size = shm_handle->shm_handle_size;
+               if (chunk_size > map_size - mapped_size) {
+                       /*
+                        * Partial mapping of last chunk due to
+                        * page size mismatch.
+                        */
+                       assert(vm_map_page_shift(current_map()) < PAGE_SHIFT);
+                       assert(shm_handle->shm_handle_next == NULL);
+                       chunk_size = map_size - mapped_size;
+               }
+               rv = vm_map_enter_mem_object(
+                       current_map(),          /* process map */
+                       &attach_va,             /* attach address */
+                       chunk_size,             /* size to map */
+                       (mach_vm_offset_t)0,    /* alignment mask */
+                       VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
+                       VM_MAP_KERNEL_FLAGS_NONE,
+                       VM_KERN_MEMORY_NONE,
                        shm_handle->shm_object,
                        (mach_vm_offset_t)0,
                        FALSE,
                        prot,
                        prot,
-                       VM_INHERIT_DEFAULT);
-       if (rv != KERN_SUCCESS) 
+                       VM_INHERIT_SHARE);
+               if (rv != KERN_SUCCESS) {
                        goto out;
+               }
 
-       rv = mach_vm_inherit(current_map(), attach_va, map_size, VM_INHERIT_SHARE);
-       if (rv != KERN_SUCCESS) {
-               (void)mach_vm_deallocate(current_map(), attach_va, map_size);
-               goto out;
+               mapped_size += chunk_size;
+               attach_va = attach_va + chunk_size;
        }
 
-       shmmap_s->va = attach_va;
        shmmap_s->shmid = uap->shmid;
-       shmseg->shm_lpid = p->p_pid;
-       shmseg->shm_atime = sysv_shmtime();
-       shmseg->shm_nattch++;
-       *retval = attach_va;    /* XXX return -1 on error */
+       shmseg->u.shm_lpid = p->p_pid;
+       shmseg->u.shm_atime = sysv_shmtime();
+       shmseg->u.shm_nattch++;
+       *retval = shmmap_s->va; /* XXX return -1 on error */
        shmat_ret = 0;
        goto shmat_out;
 out:
+       if (mapped_size > 0) {
+               (void) mach_vm_deallocate(current_map(),
+                   shmmap_s->va,
+                   mapped_size);
+       }
        switch (rv) {
        case KERN_INVALID_ADDRESS:
        case KERN_NO_SPACE:
                shmat_ret = ENOMEM;
+               break;
        case KERN_PROTECTION_FAILURE:
                shmat_ret = EACCES;
+               break;
        default:
                shmat_ret = EINVAL;
+               break;
        }
 shmat_out:
        SYSV_SHM_SUBSYS_UNLOCK();
@@ -431,14 +587,21 @@ oshmctl(__unused void *p, __unused void *uap, __unused void *retval)
        return EINVAL;
 }
 
+/*
+ * Returns:    0                       Success
+ *             EINVAL
+ *     copyout:EFAULT
+ *     copyin:EFAULT
+ *     ipcperm:EPERM
+ *     ipcperm:EACCES
+ */
 int
-shmctl(__unused struct proc *p, struct shmctl_args *uap, register_t *retval)
+shmctl(__unused struct proc *p, struct shmctl_args *uap, int32_t *retval)
 {
        int error;
        kauth_cred_t cred = kauth_cred_get();
        struct user_shmid_ds inbuf;
-       struct user_shmid_ds *shmseg;
-       size_t shmid_ds_sz = sizeof(struct user_shmid_ds);
+       struct shmid_kernel *shmseg;
 
        int shmctl_ret = 0;
 
@@ -447,39 +610,52 @@ shmctl(__unused struct proc *p, struct shmctl_args *uap, register_t *retval)
 
        SYSV_SHM_SUBSYS_LOCK();
 
-       if (!shm_inited) {
-               shmctl_ret = EINVAL;
+       if ((shmctl_ret = shminit())) {
                goto shmctl_out;
        }
 
-       if (!IS_64BIT_PROCESS(p))
-               shmid_ds_sz = sizeof(struct shmid_ds);
-
        shmseg = shm_find_segment_by_shmid(uap->shmid);
        if (shmseg == NULL) {
                shmctl_ret = EINVAL;
                goto shmctl_out;
        }
 
-       /* XXAUDIT: This is the perms BEFORE any change by this call. This 
+       /* XXAUDIT: This is the perms BEFORE any change by this call. This
         * may not be what is desired.
         */
-       AUDIT_ARG(svipc_perm, &shmseg->shm_perm);
+       AUDIT_ARG(svipc_perm, &shmseg->u.shm_perm);
 
+#if CONFIG_MACF
+       error = mac_sysvshm_check_shmctl(cred, shmseg, uap->cmd);
+       if (error) {
+               shmctl_ret = error;
+               goto shmctl_out;
+       }
+#endif
        switch (uap->cmd) {
        case IPC_STAT:
-               error = ipcperm(cred, &shmseg->shm_perm, IPC_R);
+               error = ipcperm(cred, &shmseg->u.shm_perm, IPC_R);
                if (error) {
                        shmctl_ret = error;
                        goto shmctl_out;
                }
 
                if (IS_64BIT_PROCESS(p)) {
-                       error = copyout(shmseg, uap->buf, sizeof(struct user_shmid_ds));
+                       struct user_shmid_ds shmid_ds = {};
+                       memcpy(&shmid_ds, &shmseg->u, sizeof(struct user_shmid_ds));
+
+                       /* Clear kernel reserved pointer before copying to user space */
+                       shmid_ds.shm_internal = USER_ADDR_NULL;
+
+                       error = copyout(&shmid_ds, uap->buf, sizeof(shmid_ds));
                } else {
-                       struct shmid_ds shmid_ds32;
-                       shmid_ds_64to32(shmseg, &shmid_ds32);
-                       error = copyout(&shmid_ds32, uap->buf, sizeof(struct shmid_ds));
+                       struct user32_shmid_ds shmid_ds32 = {};
+                       shmid_ds_64to32(&shmseg->u, &shmid_ds32);
+
+                       /* Clear kernel reserved pointer before copying to user space */
+                       shmid_ds32.shm_internal = (user32_addr_t)0;
+
+                       error = copyout(&shmid_ds32, uap->buf, sizeof(shmid_ds32));
                }
                if (error) {
                        shmctl_ret = error;
@@ -487,7 +663,7 @@ shmctl(__unused struct proc *p, struct shmctl_args *uap, register_t *retval)
                }
                break;
        case IPC_SET:
-               error = ipcperm(cred, &shmseg->shm_perm, IPC_M);
+               error = ipcperm(cred, &shmseg->u.shm_perm, IPC_M);
                if (error) {
                        shmctl_ret = error;
                        goto shmctl_out;
@@ -495,30 +671,31 @@ shmctl(__unused struct proc *p, struct shmctl_args *uap, register_t *retval)
                if (IS_64BIT_PROCESS(p)) {
                        error = copyin(uap->buf, &inbuf, sizeof(struct user_shmid_ds));
                } else {
-                       error = copyin(uap->buf, &inbuf, sizeof(struct shmid_ds));
+                       struct user32_shmid_ds shmid_ds32;
+                       error = copyin(uap->buf, &shmid_ds32, sizeof(shmid_ds32));
                        /* convert in place; ugly, but safe */
-                       shmid_ds_32to64((struct shmid_ds *)&inbuf, &inbuf);
+                       shmid_ds_32to64(&shmid_ds32, &inbuf);
                }
                if (error) {
                        shmctl_ret = error;
                        goto shmctl_out;
                }
-               shmseg->shm_perm.uid = inbuf.shm_perm.uid;
-               shmseg->shm_perm.gid = inbuf.shm_perm.gid;
-               shmseg->shm_perm.mode =
-                   (shmseg->shm_perm.mode & ~ACCESSPERMS) |
+               shmseg->u.shm_perm.uid = inbuf.shm_perm.uid;
+               shmseg->u.shm_perm.gid = inbuf.shm_perm.gid;
+               shmseg->u.shm_perm.mode =
+                   (shmseg->u.shm_perm.mode & ~ACCESSPERMS) |
                    (inbuf.shm_perm.mode & ACCESSPERMS);
-               shmseg->shm_ctime = sysv_shmtime();
+               shmseg->u.shm_ctime = sysv_shmtime();
                break;
        case IPC_RMID:
-               error = ipcperm(cred, &shmseg->shm_perm, IPC_M);
+               error = ipcperm(cred, &shmseg->u.shm_perm, IPC_M);
                if (error) {
                        shmctl_ret = error;
                        goto shmctl_out;
                }
-               shmseg->shm_perm.key = IPC_PRIVATE;
-               shmseg->shm_perm.mode |= SHMSEG_REMOVED;
-               if (shmseg->shm_nattch <= 0) {
+               shmseg->u.shm_perm._key = IPC_PRIVATE;
+               shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
+               if (shmseg->u.shm_nattch <= 0) {
                        shm_deallocate_segment(shmseg);
                        shm_last_free = IPCID_TO_IX(uap->shmid);
                }
@@ -541,154 +718,214 @@ shmctl_out:
 static int
 shmget_existing(struct shmget_args *uap, int mode, int segnum, int *retval)
 {
-       struct user_shmid_ds *shmseg;
-       int error;
+       struct shmid_kernel *shmseg;
+       int error = 0;
 
        shmseg = &shmsegs[segnum];
-       if (shmseg->shm_perm.mode & SHMSEG_REMOVED) {
+       if (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) {
                /*
                 * This segment is in the process of being allocated.  Wait
                 * until it's done, and look the key up again (in case the
                 * allocation failed or it was freed).
                 */
-               shmseg->shm_perm.mode |= SHMSEG_WANTED;
+               shmseg->u.shm_perm.mode |= SHMSEG_WANTED;
                error = tsleep((caddr_t)shmseg, PLOCK | PCATCH, "shmget", 0);
-               if (error)
+               if (error) {
                        return error;
+               }
                return EAGAIN;
        }
-       error = ipcperm(kauth_cred_get(), &shmseg->shm_perm, mode);
-       if (error)
+
+       /*
+        * The low 9 bits of shmflag are the mode bits being requested, which
+        * are the actual mode bits desired on the segment, and not in IPC_R
+        * form; therefore it would be incorrect to call ipcperm() to validate
+        * them; instead, we AND the existing mode with the requested mode, and
+        * verify that it matches the requested mode; otherwise, we fail with
+        * EACCES (access denied).
+        */
+       if ((shmseg->u.shm_perm.mode & mode) != mode) {
+               return EACCES;
+       }
+
+#if CONFIG_MACF
+       error = mac_sysvshm_check_shmget(kauth_cred_get(), shmseg, uap->shmflg);
+       if (error) {
                return error;
-       if (uap->size && uap->size > shmseg->shm_segsz)
+       }
+#endif
+
+       if (uap->size && uap->size > shmseg->u.shm_segsz) {
                return EINVAL;
-       if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
+       }
+
+       if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL)) {
                return EEXIST;
-       *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
+       }
+
+       *retval = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
        return 0;
 }
 
 static int
 shmget_allocate_segment(struct proc *p, struct shmget_args *uap, int mode,
-       int *retval)
+    int *retval)
 {
-       int i, segnum, shmid, size;
+       int i, segnum, shmid;
        kauth_cred_t cred = kauth_cred_get();
-       struct user_shmid_ds *shmseg;
+       struct shmid_kernel *shmseg;
        struct shm_handle *shm_handle;
        kern_return_t kret;
-       vm_offset_t user_addr;
+       mach_vm_size_t total_size, size, alloc_size;
        void * mem_object;
+       struct shm_handle *shm_handle_next, **shm_handle_next_p;
 
-       if (uap->size < (user_size_t)shminfo.shmmin ||
-           uap->size > (user_size_t)shminfo.shmmax)
+       if (uap->size <= 0 ||
+           uap->size < (user_size_t)shminfo.shmmin ||
+           uap->size > (user_size_t)shminfo.shmmax) {
                return EINVAL;
-       if (shm_nused >= shminfo.shmmni) /* any shmids left? */
+       }
+       if (shm_nused >= shminfo.shmmni) { /* any shmids left? */
                return ENOSPC;
-       size = mach_vm_round_page(uap->size);
-       if (shm_committed + btoc(size) > shminfo.shmall)
+       }
+       if (mach_vm_round_page_overflow(uap->size, &total_size)) {
+               return EINVAL;
+       }
+       if ((user_ssize_t)(shm_committed + btoc(total_size)) > shminfo.shmall) {
                return ENOMEM;
+       }
        if (shm_last_free < 0) {
-               for (i = 0; i < shminfo.shmmni; i++)
-                       if (shmsegs[i].shm_perm.mode & SHMSEG_FREE)
+               for (i = 0; i < shminfo.shmmni; i++) {
+                       if (shmsegs[i].u.shm_perm.mode & SHMSEG_FREE) {
                                break;
-               if (i == shminfo.shmmni)
+                       }
+               }
+               if (i == shminfo.shmmni) {
                        panic("shmseg free count inconsistent");
+               }
                segnum = i;
-       } else  {
+       } else {
                segnum = shm_last_free;
                shm_last_free = -1;
        }
        shmseg = &shmsegs[segnum];
+
        /*
         * In case we sleep in malloc(), mark the segment present but deleted
         * so that noone else tries to create the same key.
+        * XXX but we don't release the global lock !?
         */
-       kret = vm_allocate(current_map(), &user_addr, size, VM_FLAGS_ANYWHERE);
-       if (kret != KERN_SUCCESS) 
-               goto out;
-
-       kret = mach_make_memory_entry (current_map(), &size, user_addr, 
-               VM_PROT_DEFAULT, (mem_entry_name_port_t *)&mem_object, 0);
-
-       if (kret != KERN_SUCCESS) 
-               goto out;
-
-       vm_deallocate(current_map(), user_addr, size);
+       shmseg->u.shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
+       shmseg->u.shm_perm._key = uap->key;
+       shmseg->u.shm_perm._seq = (shmseg->u.shm_perm._seq + 1) & 0x7fff;
+
+       shm_handle_next_p = NULL;
+       for (alloc_size = 0;
+           alloc_size < total_size;
+           alloc_size += size) {
+               size = MIN(total_size - alloc_size, ANON_MAX_SIZE);
+               kret = mach_make_memory_entry_64(
+                       VM_MAP_NULL,
+                       (memory_object_size_t *) &size,
+                       (memory_object_offset_t) 0,
+                       MAP_MEM_NAMED_CREATE | VM_PROT_DEFAULT,
+                       (ipc_port_t *) &mem_object, 0);
+               if (kret != KERN_SUCCESS) {
+                       goto out;
+               }
 
-       shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
-       shmseg->shm_perm.key = uap->key;
-       shmseg->shm_perm.seq = (shmseg->shm_perm.seq + 1) & 0x7fff;
-       MALLOC(shm_handle, struct shm_handle *, sizeof(struct shm_handle), M_SHM, M_WAITOK);
-       if (shm_handle == NULL) {
-               kret = KERN_NO_SPACE;
-               mach_memory_entry_port_release(mem_object);
-               mem_object = NULL;
-               goto out;
+               shm_handle = kheap_alloc(KM_SHM, sizeof(struct shm_handle), Z_WAITOK);
+               if (shm_handle == NULL) {
+                       kret = KERN_NO_SPACE;
+                       mach_memory_entry_port_release(mem_object);
+                       mem_object = NULL;
+                       goto out;
+               }
+               shm_handle->shm_object = mem_object;
+               shm_handle->shm_handle_size = size;
+               shm_handle->shm_handle_next = NULL;
+               if (shm_handle_next_p == NULL) {
+                       shmseg->u.shm_internal = CAST_USER_ADDR_T(shm_handle);/* tunnel */
+               } else {
+                       *shm_handle_next_p = shm_handle;
+               }
+               shm_handle_next_p = &shm_handle->shm_handle_next;
        }
-       shm_handle->shm_object = mem_object;
-       shmid = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
 
-       shmseg->shm_internal = CAST_USER_ADDR_T(shm_handle);    /* tunnel */
-       shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_getuid(cred);
-       shmseg->shm_perm.cgid = shmseg->shm_perm.gid = cred->cr_gid;
-       shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) |
+       shmid = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
+
+       shmseg->u.shm_perm.cuid = shmseg->u.shm_perm.uid = kauth_cred_getuid(cred);
+       shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = kauth_cred_getgid(cred);
+       shmseg->u.shm_perm.mode = (shmseg->u.shm_perm.mode & SHMSEG_WANTED) |
            (mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
-       shmseg->shm_segsz = uap->size;
-       shmseg->shm_cpid = p->p_pid;
-       shmseg->shm_lpid = shmseg->shm_nattch = 0;
-       shmseg->shm_atime = shmseg->shm_dtime = 0;
-       shmseg->shm_ctime = sysv_shmtime();
+       shmseg->u.shm_segsz = uap->size;
+       shmseg->u.shm_cpid = p->p_pid;
+       shmseg->u.shm_lpid = shmseg->u.shm_nattch = 0;
+       shmseg->u.shm_atime = shmseg->u.shm_dtime = 0;
+#if CONFIG_MACF
+       mac_sysvshm_label_associate(cred, shmseg);
+#endif
+       shmseg->u.shm_ctime = sysv_shmtime();
        shm_committed += btoc(size);
        shm_nused++;
-       AUDIT_ARG(svipc_perm, &shmseg->shm_perm);
-       if (shmseg->shm_perm.mode & SHMSEG_WANTED) {
+       AUDIT_ARG(svipc_perm, &shmseg->u.shm_perm);
+       if (shmseg->u.shm_perm.mode & SHMSEG_WANTED) {
                /*
                 * Somebody else wanted this key while we were asleep.  Wake
                 * them up now.
                 */
-               shmseg->shm_perm.mode &= ~SHMSEG_WANTED;
+               shmseg->u.shm_perm.mode &= ~SHMSEG_WANTED;
                wakeup((caddr_t)shmseg);
        }
        *retval = shmid;
        AUDIT_ARG(svipc_id, shmid);
        return 0;
-out: 
+out:
+       if (kret != KERN_SUCCESS) {
+               for (shm_handle = CAST_DOWN(void *, shmseg->u.shm_internal); /* tunnel */
+                   shm_handle != NULL;
+                   shm_handle = shm_handle_next) {
+                       shm_handle_next = shm_handle->shm_handle_next;
+                       mach_memory_entry_port_release(shm_handle->shm_object);
+                       kheap_free(KM_SHM, shm_handle, sizeof(struct shm_handle));
+               }
+               shmseg->u.shm_internal = USER_ADDR_NULL; /* tunnel */
+       }
+
        switch (kret) {
        case KERN_INVALID_ADDRESS:
        case KERN_NO_SPACE:
-               return (ENOMEM);
+               return ENOMEM;
        case KERN_PROTECTION_FAILURE:
-               return (EACCES);
+               return EACCES;
        default:
-               return (EINVAL);
+               return EINVAL;
        }
-
 }
 
 int
-shmget(struct proc *p, struct shmget_args *uap, register_t *retval)
+shmget(struct proc *p, struct shmget_args *uap, int32_t *retval)
 {
        int segnum, mode, error;
        int shmget_ret = 0;
-       
+
        /* Auditing is actually done in shmget_allocate_segment() */
 
        SYSV_SHM_SUBSYS_LOCK();
 
-       if (!shm_inited) {
-               shmget_ret = EINVAL;
+       if ((shmget_ret = shminit())) {
                goto shmget_out;
        }
 
        mode = uap->shmflg & ACCESSPERMS;
        if (uap->key != IPC_PRIVATE) {
-       again:
+again:
                segnum = shm_find_segment_by_key(uap->key);
                if (segnum >= 0) {
                        error = shmget_existing(uap, mode, segnum, retval);
-                       if (error == EAGAIN)
+                       if (error == EAGAIN) {
                                goto again;
+                       }
                        shmget_ret = error;
                        goto shmget_out;
                }
@@ -701,20 +938,38 @@ shmget(struct proc *p, struct shmget_args *uap, register_t *retval)
 shmget_out:
        SYSV_SHM_SUBSYS_UNLOCK();
        return shmget_ret;
-       /*NOTREACHED*/
-
 }
 
-/* XXX actually varargs. */
+/*
+ * shmsys
+ *
+ * Entry point for all SHM calls: shmat, oshmctl, shmdt, shmget, shmctl
+ *
+ * Parameters: p       Process requesting the call
+ *              uap    User argument descriptor (see below)
+ *              retval Return value of the selected shm call
+ *
+ * Indirect parameters:        uap->which      msg call to invoke (index in array of shm calls)
+ *                      uap->a2                User argument descriptor
+ *
+ * Returns:    0       Success
+ *              !0     Not success
+ *
+ * Implicit returns: retval     Return value of the selected shm call
+ *
+ * DEPRECATED:  This interface should not be used to call the other SHM
+ *              functions (shmat, oshmctl, shmdt, shmget, shmctl). The correct
+ *              usage is to call the other SHM functions directly.
+ */
 int
-shmsys(struct proc *p, struct shmsys_args *uap, register_t *retval)
+shmsys(struct proc *p, struct shmsys_args *uap, int32_t *retval)
 {
-
        /* The routine that we are dispatching already does this */
 
-       if (uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0]))
+       if (uap->which >= sizeof(shmcalls) / sizeof(shmcalls[0])) {
                return EINVAL;
-       return ((*shmcalls[uap->which])(p, &uap->a2, retval));
+       }
+       return (*shmcalls[uap->which])(p, &uap->a2, retval);
 }
 
 /*
@@ -725,51 +980,81 @@ shmfork(struct proc *p1, struct proc *p2)
 {
        struct shmmap_state *shmmap_s;
        size_t size;
-       int i;
-       int shmfork_ret = 0;
+       int nsegs = 0;
+       int ret = 0;
 
        SYSV_SHM_SUBSYS_LOCK();
 
-       if (!shm_inited) {
-               shmfork_ret = 0;
+       if (shminit()) {
+               ret = 1;
+               goto shmfork_out;
+       }
+
+       struct shmmap_state *src = (struct shmmap_state *)p1->vm_shm;
+       assert(src);
+
+       /* count number of shmid entries in src */
+       for (struct shmmap_state *s = src; s->shmid != SHMID_SENTINEL; s++) {
+               nsegs++;
+       }
+
+       if (os_add_and_mul_overflow(nsegs, 1, sizeof(struct shmmap_state), &size)) {
+               ret = 1;
                goto shmfork_out;
        }
-               
-       size = shminfo.shmseg * sizeof(struct shmmap_state);
-       MALLOC(shmmap_s, struct shmmap_state *, size, M_SHM, M_WAITOK);
-       if (shmmap_s != NULL) {
-               bcopy((caddr_t)p1->vm_shm, (caddr_t)shmmap_s, size);
-               p2->vm_shm = (caddr_t)shmmap_s;
-               for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
-                       if (shmmap_s->shmid != -1)
-                               shmsegs[IPCID_TO_IX(shmmap_s->shmid)].shm_nattch++;
-               shmfork_ret = 0;
+       shmmap_s = kheap_alloc(KM_SHM, size, Z_WAITOK);
+       if (shmmap_s == NULL) {
+               ret = 1;
                goto shmfork_out;
        }
 
-       shmfork_ret = 1;        /* failed to copy to child - ENOMEM */
+       bcopy(src, (caddr_t)shmmap_s, size);
+       p2->vm_shm = (caddr_t)shmmap_s;
+       for (; shmmap_s->shmid != SHMID_SENTINEL; shmmap_s++) {
+               if (SHMID_IS_VALID(shmmap_s->shmid)) {
+                       shmsegs[IPCID_TO_IX(shmmap_s->shmid)].u.shm_nattch++;
+               }
+       }
+
 shmfork_out:
        SYSV_SHM_SUBSYS_UNLOCK();
-       return shmfork_ret;
+       return ret;
 }
 
-void
-shmexit(struct proc *p)
+static void
+shmcleanup(struct proc *p, int deallocate)
 {
        struct shmmap_state *shmmap_s;
-       int i;
+       size_t size = 0;
+       int nsegs = 0;
+
+       SYSV_SHM_SUBSYS_LOCK();
 
        shmmap_s = (struct shmmap_state *)p->vm_shm;
+       for (; shmmap_s->shmid != SHMID_SENTINEL; shmmap_s++) {
+               nsegs++;
+               if (SHMID_IS_VALID(shmmap_s->shmid)) {
+                       /*
+                        * XXX: Should the MAC framework enforce
+                        * check here as well.
+                        */
+                       shm_delete_mapping(p, shmmap_s, deallocate);
+               }
+       }
 
-       SYSV_SHM_SUBSYS_LOCK();
-       for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
-               if (shmmap_s->shmid != -1)
-                       shm_delete_mapping(p, shmmap_s, 1);
-       FREE((caddr_t)p->vm_shm, M_SHM);
-       p->vm_shm = NULL;
+       if (os_add_and_mul_overflow(nsegs, 1, sizeof(struct shmmap_state), &size)) {
+               panic("shmcleanup: p->vm_shm buffer was correupted\n");
+       }
+       kheap_free(KM_SHM, p->vm_shm, size);
        SYSV_SHM_SUBSYS_UNLOCK();
 }
 
+void
+shmexit(struct proc *p)
+{
+       shmcleanup(p, 1);
+}
+
 /*
  * shmexec() is like shmexit(), only it doesn't delete the mappings,
  * since the old address space has already been destroyed and the new
@@ -779,24 +1064,14 @@ shmexit(struct proc *p)
 __private_extern__ void
 shmexec(struct proc *p)
 {
-       struct shmmap_state *shmmap_s;
-       int i;
-
-       shmmap_s = (struct shmmap_state *)p->vm_shm;
-       SYSV_SHM_SUBSYS_LOCK();
-       for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
-               if (shmmap_s->shmid != -1)
-                       shm_delete_mapping(p, shmmap_s, 0);
-       FREE((caddr_t)p->vm_shm, M_SHM);
-       p->vm_shm = NULL;
-       SYSV_SHM_SUBSYS_UNLOCK();
+       shmcleanup(p, 0);
 }
 
-void
-shminit(__unused void *dummy)
+int
+shminit(void)
 {
+       size_t sz;
        int i;
-       int s;
 
        if (!shm_inited) {
                /*
@@ -806,75 +1081,100 @@ shminit(__unused void *dummy)
                 * dictates this filed be a size_t, which is 64 bits when
                 * running 64 bit binaries.
                 */
-               s = sizeof(struct user_shmid_ds) * shminfo.shmmni;
+               if (os_mul_overflow(shminfo.shmmni, sizeof(struct shmid_kernel), &sz)) {
+                       return ENOMEM;
+               }
 
-               MALLOC(shmsegs, struct user_shmid_ds *, s, M_SHM, M_WAITOK);
+               shmsegs = zalloc_permanent(sz, ZALIGN_PTR);
                if (shmsegs == NULL) {
-                       /* XXX fail safely: leave shared memory uninited */
-                       return;
+                       return ENOMEM;
                }
                for (i = 0; i < shminfo.shmmni; i++) {
-                       shmsegs[i].shm_perm.mode = SHMSEG_FREE;
-                       shmsegs[i].shm_perm.seq = 0;
+                       shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
+                       shmsegs[i].u.shm_perm._seq = 0;
+#if CONFIG_MACF
+                       mac_sysvshm_label_init(&shmsegs[i]);
+#endif
                }
                shm_last_free = 0;
                shm_nused = 0;
                shm_committed = 0;
                shm_inited = 1;
        }
-}
-/* Initialize the mutex governing access to the SysV shm subsystem */
-__private_extern__ void
-sysv_shm_lock_init( void )
-{
 
-       sysv_shm_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
-       lck_grp_attr_setstat(sysv_shm_subsys_lck_grp_attr);
-       
-       sysv_shm_subsys_lck_grp = lck_grp_alloc_init("sysv_shm_subsys_lock", sysv_shm_subsys_lck_grp_attr);
-       
-       sysv_shm_subsys_lck_attr = lck_attr_alloc_init();
-       /* lck_attr_setdebug(sysv_shm_subsys_lck_attr); */
-       lck_mtx_init(&sysv_shm_subsys_mutex, sysv_shm_subsys_lck_grp, sysv_shm_subsys_lck_attr);
+       return 0;
 }
 
 /* (struct sysctl_oid *oidp, void *arg1, int arg2, \
-        struct sysctl_req *req) */
*       struct sysctl_req *req) */
 static int
 sysctl_shminfo(__unused struct sysctl_oid *oidp, void *arg1,
-       __unused int arg2, struct sysctl_req *req)
+    __unused int arg2, struct sysctl_req *req)
 {
        int error = 0;
        int sysctl_shminfo_ret = 0;
-
-       error = SYSCTL_OUT(req, arg1, sizeof(user_ssize_t));
-       if (error || req->newptr == USER_ADDR_NULL)
-               return(error);
+       int64_t saved_shmmax;
+       int64_t saved_shmmin;
+       int64_t saved_shmseg;
+       int64_t saved_shmmni;
+       int64_t saved_shmall;
+
+       error = SYSCTL_OUT(req, arg1, sizeof(int64_t));
+       if (error || req->newptr == USER_ADDR_NULL) {
+               return error;
+       }
 
        SYSV_SHM_SUBSYS_LOCK();
-       /* Set the values only if shared memory is not initialised */
-       if (!shm_inited) {
-               if ((error = SYSCTL_IN(req, arg1, sizeof(user_ssize_t))) 
-                   != 0) {
-                       sysctl_shminfo_ret = error;
+
+       /* shmmni can not be changed after SysV SHM has been initialized */
+       if (shm_inited && arg1 == &shminfo.shmmni) {
+               sysctl_shminfo_ret = EPERM;
+               goto sysctl_shminfo_out;
+       }
+       saved_shmmax = shminfo.shmmax;
+       saved_shmmin = shminfo.shmmin;
+       saved_shmseg = shminfo.shmseg;
+       saved_shmmni = shminfo.shmmni;
+       saved_shmall = shminfo.shmall;
+
+       if ((error = SYSCTL_IN(req, arg1, sizeof(int64_t))) != 0) {
+               sysctl_shminfo_ret = error;
+               goto sysctl_shminfo_out;
+       }
+
+       if (arg1 == &shminfo.shmmax) {
+               /* shmmax needs to be page-aligned */
+               if (shminfo.shmmax & PAGE_MASK_64 || shminfo.shmmax < 0) {
+                       shminfo.shmmax = saved_shmmax;
+                       sysctl_shminfo_ret = EINVAL;
                        goto sysctl_shminfo_out;
                }
-
-               if (arg1 == &shminfo.shmmax) {
-                       if (shminfo.shmmax & PAGE_MASK_64) {
-                               shminfo.shmmax = (user_ssize_t)-1;
-                               sysctl_shminfo_ret = EINVAL;
-                               goto sysctl_shminfo_out;
-                       }
+       } else if (arg1 == &shminfo.shmmin) {
+               if (shminfo.shmmin < 0) {
+                       shminfo.shmmin = saved_shmmin;
+                       sysctl_shminfo_ret = EINVAL;
+                       goto sysctl_shminfo_out;
                }
-
-               /* Initialize only when all values are set */
-               if ((shminfo.shmmax != (user_ssize_t)-1) &&
-                       (shminfo.shmmin != (user_ssize_t)-1) && 
-                       (shminfo.shmmni != (user_ssize_t)-1) &&
-                       (shminfo.shmseg != (user_ssize_t)-1) &&
-                       (shminfo.shmall != (user_ssize_t)-1)) {
-                               shminit(NULL);
+       } else if (arg1 == &shminfo.shmseg) {
+               /* add a sanity check - 20847256 */
+               if (shminfo.shmseg > INT32_MAX || shminfo.shmseg < 0) {
+                       shminfo.shmseg = saved_shmseg;
+                       sysctl_shminfo_ret = EINVAL;
+                       goto sysctl_shminfo_out;
+               }
+       } else if (arg1 == &shminfo.shmmni) {
+               /* add a sanity check - 20847256 */
+               if (shminfo.shmmni > INT32_MAX || shminfo.shmmni < 0) {
+                       shminfo.shmmni = saved_shmmni;
+                       sysctl_shminfo_ret = EINVAL;
+                       goto sysctl_shminfo_out;
+               }
+       } else if (arg1 == &shminfo.shmall) {
+               /* add a sanity check - 20847256 */
+               if (shminfo.shmall > INT32_MAX || shminfo.shmall < 0) {
+                       shminfo.shmall = saved_shmall;
+                       sysctl_shminfo_ret = EINVAL;
+                       goto sysctl_shminfo_out;
                }
        }
        sysctl_shminfo_ret = 0;
@@ -885,32 +1185,30 @@ sysctl_shminfo_out:
 
 static int
 IPCS_shm_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
-       __unused int arg2, struct sysctl_req *req)
+    __unused int arg2, struct sysctl_req *req)
 {
        int error;
        int cursor;
        union {
-               struct IPCS_command u32;
+               struct user32_IPCS_command u32;
                struct user_IPCS_command u64;
-       } ipcs;
-       struct shmid_ds shmid_ds32;     /* post conversion, 32 bit version */
+       } ipcs = { };
+       struct user32_shmid_ds shmid_ds32 = { }; /* post conversion, 32 bit version */
+       struct user_shmid_ds   shmid_ds = { };   /* 64 bit version */
        void *shmid_dsp;
        size_t ipcs_sz = sizeof(struct user_IPCS_command);
        size_t shmid_ds_sz = sizeof(struct user_shmid_ds);
        struct proc *p = current_proc();
 
-       int ipcs__shminfo_ret = 0;
-
        SYSV_SHM_SUBSYS_LOCK();
 
-       if (!shm_inited) {
-               error = EINVAL;
+       if ((error = shminit())) {
                goto ipcs_shm_sysctl_out;
        }
 
        if (!IS_64BIT_PROCESS(p)) {
-               ipcs_sz = sizeof(struct IPCS_command);
-               shmid_ds_sz = sizeof(struct shmid_ds);
+               ipcs_sz = sizeof(struct user32_IPCS_command);
+               shmid_ds_sz = sizeof(struct user32_shmid_ds);
        }
 
        /* Copy in the command structure */
@@ -918,8 +1216,9 @@ IPCS_shm_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
                goto ipcs_shm_sysctl_out;
        }
 
-       if (!IS_64BIT_PROCESS(p))       /* convert in place */
+       if (!IS_64BIT_PROCESS(p)) {     /* convert in place */
                ipcs.u64.ipcs_data = CAST_USER_ADDR_T(ipcs.u32.ipcs_data);
+       }
 
        /* Let us version this interface... */
        if (ipcs.u64.ipcs_magic != IPCS_MAGIC) {
@@ -927,8 +1226,8 @@ IPCS_shm_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
                goto ipcs_shm_sysctl_out;
        }
 
-       switch(ipcs.u64.ipcs_op) {
-       case IPCS_SHM_CONF:     /* Obtain global configuration data */
+       switch (ipcs.u64.ipcs_op) {
+       case IPCS_SHM_CONF:     /* Obtain global configuration data */
                if (ipcs.u64.ipcs_datalen != sizeof(struct shminfo)) {
                        if (ipcs.u64.ipcs_cursor != 0) { /* fwd. compat. */
                                error = ENOMEM;
@@ -940,19 +1239,20 @@ IPCS_shm_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
                error = copyout(&shminfo, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
                break;
 
-       case IPCS_SHM_ITER:     /* Iterate over existing segments */
+       case IPCS_SHM_ITER:     /* Iterate over existing segments */
                cursor = ipcs.u64.ipcs_cursor;
                if (cursor < 0 || cursor >= shminfo.shmmni) {
                        error = ERANGE;
                        break;
                }
                if (ipcs.u64.ipcs_datalen != (int)shmid_ds_sz) {
-                       error = ENOMEM;
+                       error = EINVAL;
                        break;
                }
-               for; cursor < shminfo.shmmni; cursor++) {
-                       if (shmsegs[cursor].shm_perm.mode & SHMSEG_ALLOCATED)
+               for (; cursor < shminfo.shmmni; cursor++) {
+                       if (shmsegs[cursor].u.shm_perm.mode & SHMSEG_ALLOCATED) {
                                break;
+                       }
                        continue;
                }
                if (cursor == shminfo.shmmni) {
@@ -960,7 +1260,7 @@ IPCS_shm_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
                        break;
                }
 
-               shmid_dsp = &shmsegs[cursor];   /* default: 64 bit */
+               shmid_dsp = &shmsegs[cursor];   /* default: 64 bit */
 
                /*
                 * If necessary, convert the 64 bit kernel segment
@@ -968,15 +1268,28 @@ IPCS_shm_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
                 */
                if (!IS_64BIT_PROCESS(p)) {
                        shmid_ds_64to32(shmid_dsp, &shmid_ds32);
+
+                       /* Clear kernel reserved pointer before copying to user space */
+                       shmid_ds32.shm_internal = (user32_addr_t)0;
+
                        shmid_dsp = &shmid_ds32;
+               } else {
+                       memcpy(&shmid_ds, shmid_dsp, sizeof(shmid_ds));
+
+                       /* Clear kernel reserved pointer before copying to user space */
+                       shmid_ds.shm_internal = USER_ADDR_NULL;
+
+                       shmid_dsp = &shmid_ds;
                }
                error = copyout(shmid_dsp, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
                if (!error) {
                        /* update cursor */
                        ipcs.u64.ipcs_cursor = cursor + 1;
 
-               if (!IS_64BIT_PROCESS(p))       /* convert in place */
-                       ipcs.u32.ipcs_data = CAST_DOWN(void *,ipcs.u64.ipcs_data);
+                       if (!IS_64BIT_PROCESS(p)) { /* convert in place */
+                               ipcs.u32.ipcs_data = CAST_DOWN_EXPLICIT(user32_addr_t, ipcs.u64.ipcs_data);
+                       }
+
                        error = SYSCTL_OUT(req, &ipcs, ipcs_sz);
                }
                break;
@@ -987,29 +1300,32 @@ IPCS_shm_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
        }
 ipcs_shm_sysctl_out:
        SYSV_SHM_SUBSYS_UNLOCK();
-       return(error);
+       return error;
 }
 
-SYSCTL_NODE(_kern, KERN_SYSV, sysv, CTLFLAG_RW, 0, "SYSV");
+SYSCTL_NODE(_kern, KERN_SYSV, sysv, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, 0, "SYSV");
+
+SYSCTL_PROC(_kern_sysv, OID_AUTO, shmmax, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+    &shminfo.shmmax, 0, &sysctl_shminfo, "Q", "shmmax");
 
-SYSCTL_PROC(_kern_sysv, KSYSV_SHMMAX, shmmax, CTLTYPE_QUAD | CTLFLAG_RW,
-    &shminfo.shmmax, 0, &sysctl_shminfo ,"Q","shmmax");
+SYSCTL_PROC(_kern_sysv, OID_AUTO, shmmin, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+    &shminfo.shmmin, 0, &sysctl_shminfo, "Q", "shmmin");
 
-SYSCTL_PROC(_kern_sysv, KSYSV_SHMMIN, shmmin, CTLTYPE_QUAD | CTLFLAG_RW,
-    &shminfo.shmmin, 0, &sysctl_shminfo ,"Q","shmmin");
+SYSCTL_PROC(_kern_sysv, OID_AUTO, shmmni, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+    &shminfo.shmmni, 0, &sysctl_shminfo, "Q", "shmmni");
 
-SYSCTL_PROC(_kern_sysv, KSYSV_SHMMNI, shmmni, CTLTYPE_QUAD | CTLFLAG_RW,
-    &shminfo.shmmni, 0, &sysctl_shminfo ,"Q","shmmni");
+SYSCTL_PROC(_kern_sysv, OID_AUTO, shmseg, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+    &shminfo.shmseg, 0, &sysctl_shminfo, "Q", "shmseg");
 
-SYSCTL_PROC(_kern_sysv, KSYSV_SHMSEG, shmseg, CTLTYPE_QUAD | CTLFLAG_RW,
-    &shminfo.shmseg, 0, &sysctl_shminfo ,"Q","shmseg");
+SYSCTL_PROC(_kern_sysv, OID_AUTO, shmall, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+    &shminfo.shmall, 0, &sysctl_shminfo, "Q", "shmall");
 
-SYSCTL_PROC(_kern_sysv, KSYSV_SHMALL, shmall, CTLTYPE_QUAD | CTLFLAG_RW,
-    &shminfo.shmall, 0, &sysctl_shminfo ,"Q","shmall");
+SYSCTL_NODE(_kern_sysv, OID_AUTO, ipcs, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, 0, "SYSVIPCS");
 
-SYSCTL_NODE(_kern_sysv, OID_AUTO, ipcs, CTLFLAG_RW, 0, "SYSVIPCS");
+SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, shm, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
+    0, 0, IPCS_shm_sysctl,
+    "S,IPCS_shm_command",
+    "ipcs shm command interface");
+#endif /* SYSV_SHM */
 
-SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, shm, CTLFLAG_RW|CTLFLAG_ANYBODY,
-       0, 0, IPCS_shm_sysctl,
-       "S,IPCS_shm_command",
-       "ipcs shm command interface");
+/* DSEP Review Done pl-20051108-v02 @2743,@2908,@2913,@3009 */