]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/ipc/mach_msg.c
xnu-1228.3.13.tar.gz
[apple/xnu.git] / osfmk / ipc / mach_msg.c
index 6036e409d83137fc4864f4202e90150c6ef7a281..065f3b40fcff2b1c5875fe1c703558157cdc83e6 100644 (file)
@@ -1,16 +1,19 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
  * 
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
@@ -20,7 +23,7 @@
  * Please see the License for the specific language governing rights and
  * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
  * any improvements or extensions that they make and grant Carnegie Mellon
  * the rights to redistribute these changes.
  */
+/*
+ * NOTICE: This file was modified by McAfee Research in 2004 to introduce
+ * support for mandatory and extensible security protections.  This notice
+ * is included in support of clause 2.2 (b) of the Apple Public License,
+ * Version 2.0.
+ * Copyright (c) 2005 SPARTA, Inc.
+ */
 /*
  */
 /*
  *     Exported message traps.  See mach/message.h.
  */
 
-#include <cpus.h>
-
+#include <mach/mach_types.h>
 #include <mach/kern_return.h>
 #include <mach/port.h>
 #include <mach/message.h>
 #include <mach/mig_errors.h>
+#include <mach/mach_traps.h>
+
+#include <kern/kern_types.h>
 #include <kern/assert.h>
 #include <kern/counters.h>
 #include <kern/cpu_number.h>
+#include <kern/ipc_kobject.h>
+#include <kern/ipc_mig.h>
 #include <kern/task.h>
 #include <kern/thread.h>
 #include <kern/lock.h>
 #include <kern/sched_prim.h>
 #include <kern/exception.h>
 #include <kern/misc_protos.h>
+#include <kern/kalloc.h>
+#include <kern/processor.h>
+#include <kern/syscall_subr.h>
+
 #include <vm/vm_map.h>
+
+#include <ipc/ipc_types.h>
 #include <ipc/ipc_kmsg.h>
 #include <ipc/ipc_mqueue.h>
 #include <ipc/ipc_object.h>
 #include <ipc/ipc_pset.h>
 #include <ipc/ipc_space.h>
 #include <ipc/ipc_entry.h>
-#include <kern/kalloc.h>
-#include <kern/thread_swap.h>
-#include <kern/processor.h>
-
-#include <kern/mk_sp.h>
 
 #include <machine/machine_routines.h>
+#include <security/mac_mach_internal.h>
+
 #include <sys/kdebug.h>
 
+
+#ifndef offsetof
+#define offsetof(type, member)  ((size_t)(&((type *)0)->member))
+#endif /* offsetof */
+
 /*
- * Forward declarations
+ * Forward declarations - kernel internal routines
  */
 
 mach_msg_return_t mach_msg_send(
        mach_msg_header_t       *msg,
        mach_msg_option_t       option,
        mach_msg_size_t         send_size,
-       mach_msg_timeout_t      timeout,
+       mach_msg_timeout_t      send_timeout,
        mach_port_name_t        notify);
 
 mach_msg_return_t mach_msg_receive(
@@ -109,19 +131,22 @@ mach_msg_return_t mach_msg_receive(
        mach_msg_option_t       option,
        mach_msg_size_t         rcv_size,
        mach_port_name_t        rcv_name,
-       mach_msg_timeout_t      timeout,
+       mach_msg_timeout_t      rcv_timeout,
        void                    (*continuation)(mach_msg_return_t),
        mach_msg_size_t         slist_size);
 
 
+mach_msg_return_t mach_msg_receive_results(void);
+
 mach_msg_return_t msg_receive_error(
        ipc_kmsg_t              kmsg,
-       mach_msg_header_t       *msg,
+       mach_vm_address_t       msg_addr,
        mach_msg_option_t       option,
        mach_port_seqno_t       seqno,
        ipc_space_t             space);
 
 security_token_t KERNEL_SECURITY_TOKEN = KERNEL_SECURITY_TOKEN_VALUE;
+audit_token_t KERNEL_AUDIT_TOKEN = KERNEL_AUDIT_TOKEN_VALUE;
 
 mach_msg_format_0_trailer_t trailer_template = {
        /* mach_msg_trailer_type_t */ MACH_MSG_TRAILER_FORMAT_0,
@@ -160,19 +185,45 @@ mach_msg_send(
        mach_msg_header_t       *msg,
        mach_msg_option_t       option,
        mach_msg_size_t         send_size,
-       mach_msg_timeout_t      timeout,
+       mach_msg_timeout_t      send_timeout,
        mach_port_name_t        notify)
 {
        ipc_space_t space = current_space();
        vm_map_t map = current_map();
        ipc_kmsg_t kmsg;
        mach_msg_return_t mr;
+       mach_msg_size_t msg_and_trailer_size;
+       mach_msg_max_trailer_t  *trailer;
 
-       mr = ipc_kmsg_get(msg, send_size, &kmsg);
+       if ((send_size < sizeof(mach_msg_header_t)) || (send_size & 3))
+               return MACH_SEND_MSG_TOO_SMALL;
 
-       if (mr != MACH_MSG_SUCCESS)
-               return mr;
+       if (send_size > MACH_MSG_SIZE_MAX - MAX_TRAILER_SIZE)
+               return MACH_SEND_TOO_LARGE;
+       
+       msg_and_trailer_size = send_size + MAX_TRAILER_SIZE;
+
+       kmsg = ipc_kmsg_alloc(msg_and_trailer_size);
 
+       if (kmsg == IKM_NULL)
+               return MACH_SEND_NO_BUFFER;
+
+       (void) memcpy((void *) kmsg->ikm_header, (const void *) msg, send_size);
+
+       kmsg->ikm_header->msgh_size = send_size;
+
+       /* 
+        * reserve for the trailer the largest space (MAX_TRAILER_SIZE)
+        * However, the internal size field of the trailer (msgh_trailer_size)
+        * is initialized to the minimum (sizeof(mach_msg_trailer_t)), to optimize
+        * the cases where no implicit data is requested.
+        */
+       trailer = (mach_msg_max_trailer_t *) ((vm_offset_t)kmsg->ikm_header + send_size);
+       trailer->msgh_sender = current_thread()->task->sec_token;
+       trailer->msgh_audit = current_thread()->task->audit_token;
+       trailer->msgh_trailer_type = MACH_MSG_TRAILER_FORMAT_0;
+       trailer->msgh_trailer_size = MACH_MSG_TRAILER_MINIMUM_SIZE;
+       
        if (option & MACH_SEND_CANCEL) {
                if (notify == MACH_PORT_NULL)
                        mr = MACH_SEND_INVALID_NOTIFY;
@@ -185,11 +236,13 @@ mach_msg_send(
                return mr;
        }
 
-       mr = ipc_kmsg_send(kmsg, option & MACH_SEND_TIMEOUT, timeout);
+       mr = ipc_kmsg_send(kmsg, option & MACH_SEND_TIMEOUT, send_timeout);
 
        if (mr != MACH_MSG_SUCCESS) {
            mr |= ipc_kmsg_copyout_pseudo(kmsg, space, map, MACH_MSG_BODY_NULL);
-           (void) ipc_kmsg_put(msg, kmsg, kmsg->ikm_header.msgh_size);
+           (void) memcpy((void *) msg, (const void *) kmsg->ikm_header, 
+                         kmsg->ikm_header->msgh_size);
+           ipc_kmsg_free(kmsg);
        }
 
        return mr;
@@ -225,13 +278,12 @@ mach_msg_receive_results(void)
 
        ipc_object_t      object = self->ith_object;
        mach_msg_return_t mr = self->ith_state;
-       mach_msg_header_t *msg = self->ith_msg;
+       mach_vm_address_t msg_addr = self->ith_msg_addr;
        mach_msg_option_t option = self->ith_option;
        ipc_kmsg_t        kmsg = self->ith_kmsg;
        mach_port_seqno_t seqno = self->ith_seqno;
-       mach_msg_size_t   slist_size = self->ith_scatter_list_size;
 
-       mach_msg_format_0_trailer_t *trailer;
+       mach_msg_max_trailer_t *trailer;
 
        ipc_object_release(object);
 
@@ -246,25 +298,76 @@ mach_msg_receive_results(void)
               * the queue).
               */
              if (copyout((char *) &self->ith_msize,
-                         (char *) &msg->msgh_size,
+                         msg_addr + offsetof(mach_msg_header_t, msgh_size),
                          sizeof(mach_msg_size_t)))
                mr = MACH_RCV_INVALID_DATA;
              goto out;
            }
                  
-           if (msg_receive_error(kmsg, msg, option, seqno, space)
+           if (msg_receive_error(kmsg, msg_addr, option, seqno, space)
                == MACH_RCV_INVALID_DATA)
              mr = MACH_RCV_INVALID_DATA;
          }
          goto out;
        }
 
-       trailer = (mach_msg_format_0_trailer_t *)
-                       ((vm_offset_t)&kmsg->ikm_header +
-                       round_msg(kmsg->ikm_header.msgh_size));
+       trailer = (mach_msg_max_trailer_t *)
+                       ((vm_offset_t)kmsg->ikm_header +
+                       round_msg(kmsg->ikm_header->msgh_size));
        if (option & MACH_RCV_TRAILER_MASK) {
                trailer->msgh_seqno = seqno;
                trailer->msgh_trailer_size = REQUESTED_TRAILER_SIZE(option);
+
+
+               if (option & MACH_RCV_TRAILER_ELEMENTS (MACH_RCV_TRAILER_AV)) {
+#if CONFIG_MACF_MACH
+                 if (kmsg->ikm_sender != NULL &&
+                   IP_VALID(kmsg->ikm_header->msgh_remote_port) &&
+                   mac_port_check_method(kmsg->ikm_sender,
+                   &kmsg->ikm_sender->maclabel,
+                   &((ipc_port_t)kmsg->ikm_header->msgh_remote_port)->ip_label,
+                   kmsg->ikm_header->msgh_id) == 0)
+                     trailer->msgh_ad = 1;
+                 else
+#endif
+                     trailer->msgh_ad = 0;
+               }
+
+               /*
+                * The ipc_kmsg_t holds a reference to the label of a label
+                * handle, not the port. We must get a reference to the port
+                * and a send right to copyout to the receiver.
+                */
+
+               if (option & MACH_RCV_TRAILER_ELEMENTS (MACH_RCV_TRAILER_LABELS)) {
+#if CONFIG_MACF_MACH
+                 if (kmsg->ikm_sender != NULL) {
+                   ipc_labelh_t  lh = kmsg->ikm_sender->label;
+                   kern_return_t kr;
+
+                   ip_lock(lh->lh_port);
+                   lh->lh_port->ip_mscount++;
+                   lh->lh_port->ip_srights++;
+                   ip_reference(lh->lh_port);
+                   ip_unlock(lh->lh_port);
+
+                   kr = ipc_object_copyout(space, (ipc_object_t)lh->lh_port,
+                                           MACH_MSG_TYPE_PORT_SEND, 0,
+                                           &trailer->msgh_labels.sender);
+                   if (kr != KERN_SUCCESS) {
+                     ip_lock(lh->lh_port);
+                     ip_release(lh->lh_port);
+                     ip_check_unlock(lh->lh_port);
+
+                     trailer->msgh_labels.sender = 0;
+                   }
+                 } else {
+                   trailer->msgh_labels.sender = 0;
+                 }
+#else
+                   trailer->msgh_labels.sender = 0;
+#endif
+               }
        }
 
        /*
@@ -277,7 +380,7 @@ mach_msg_receive_results(void)
                mach_msg_size_t slist_size = self->ith_scatter_list_size;
                mach_msg_body_t *slist;
 
-               slist = ipc_kmsg_copyin_scatter(msg, slist_size, kmsg);
+               slist = ipc_kmsg_get_scatter(msg_addr, slist_size, kmsg);
                mr = ipc_kmsg_copyout(kmsg, space, map, MACH_PORT_NULL, slist);
                ipc_kmsg_free_scatter(slist, slist_size);
        } else {
@@ -287,20 +390,20 @@ mach_msg_receive_results(void)
 
        if (mr != MACH_MSG_SUCCESS) {
                if ((mr &~ MACH_MSG_MASK) == MACH_RCV_BODY_ERROR) {
-                       if (ipc_kmsg_put(msg, kmsg, kmsg->ikm_header.msgh_size +
+                       if (ipc_kmsg_put(msg_addr, kmsg, kmsg->ikm_header->msgh_size +
                           trailer->msgh_trailer_size) == MACH_RCV_INVALID_DATA)
                                mr = MACH_RCV_INVALID_DATA;
                } 
                else {
-                       if (msg_receive_error(kmsg, msg, option, seqno, space) 
+                       if (msg_receive_error(kmsg, msg_addr, option, seqno, space) 
                                                == MACH_RCV_INVALID_DATA)
                                mr = MACH_RCV_INVALID_DATA;
                }
                goto out;
        }
-       mr = ipc_kmsg_put(msg,
+       mr = ipc_kmsg_put(msg_addr,
                          kmsg,
-                         kmsg->ikm_header.msgh_size + 
+                         kmsg->ikm_header->msgh_size + 
                          trailer->msgh_trailer_size);
  out:
        return mr;
@@ -312,20 +415,15 @@ mach_msg_receive(
        mach_msg_option_t       option,
        mach_msg_size_t         rcv_size,
        mach_port_name_t        rcv_name,
-       mach_msg_timeout_t      timeout,
+       mach_msg_timeout_t      rcv_timeout,
        void                    (*continuation)(mach_msg_return_t),
        mach_msg_size_t         slist_size)
 {
        thread_t self = current_thread();
        ipc_space_t space = current_space();
-       vm_map_t map = current_map();
        ipc_object_t object;
        ipc_mqueue_t mqueue;
-       ipc_kmsg_t kmsg;
-       mach_port_seqno_t seqno;
        mach_msg_return_t mr;
-       mach_msg_body_t *slist;
-       mach_msg_format_0_trailer_t *trailer;
 
        mr = ipc_mqueue_copyin(space, rcv_name, &mqueue, &object);
        if (mr != MACH_MSG_SUCCESS) {
@@ -333,16 +431,16 @@ mach_msg_receive(
        }
        /* hold ref for object */
 
-       self->ith_msg = msg;
+       self->ith_msg_addr = CAST_DOWN(mach_vm_address_t, msg);
        self->ith_object = object;
        self->ith_msize = rcv_size;
        self->ith_option = option;
        self->ith_scatter_list_size = slist_size;
        self->ith_continuation = continuation;
 
-       ipc_mqueue_receive(mqueue, option, rcv_size, timeout, THREAD_ABORTSAFE);
-       if ((option & MACH_RCV_TIMEOUT) && timeout == 0)
-               _mk_sp_thread_perhaps_yield(self);
+       ipc_mqueue_receive(mqueue, option, rcv_size, rcv_timeout, THREAD_ABORTSAFE);
+       if ((option & MACH_RCV_TIMEOUT) && rcv_timeout == 0)
+               thread_poll_yield(self);
        return mach_msg_receive_results();
 }
 
@@ -527,7 +625,11 @@ unsigned int c_mmot_kernel_send = 0;               /* kernel server calls  */
 
 #endif /* !HOTPATH_DEBUG */
 
+#if CONFIG_MACF_MACH
+boolean_t enable_hotpath = FALSE;      /* XXX - push MAC into HOTPATH too */
+#else
 boolean_t enable_hotpath = TRUE;       /* Patchable, just in case ...  */
+#endif
 #endif /* HOTPATH_ENABLE */
 
 /*
@@ -542,36 +644,35 @@ boolean_t enable_hotpath = TRUE;  /* Patchable, just in case ...  */
 
 mach_msg_return_t
 mach_msg_overwrite_trap(
-       mach_msg_header_t       *msg,
-       mach_msg_option_t       option,
-       mach_msg_size_t         send_size,
-       mach_msg_size_t         rcv_size,
-       mach_port_name_t        rcv_name,
-       mach_msg_timeout_t      timeout,
-       mach_port_name_t        notify,
-       mach_msg_header_t       *rcv_msg,
-        mach_msg_size_t                scatter_list_size)
+       struct mach_msg_overwrite_trap_args *args)
 {
-       register mach_msg_header_t *hdr;
+       mach_vm_address_t       msg_addr = args->msg;
+       mach_msg_option_t       option = args->option;
+       mach_msg_size_t         send_size = args->send_size;
+       mach_msg_size_t         rcv_size = args->rcv_size;
+       mach_port_name_t        rcv_name = args->rcv_name;
+       mach_msg_timeout_t      msg_timeout = args->timeout;
+       mach_port_name_t        notify = args->notify;
+       mach_vm_address_t       rcv_msg_addr = args->rcv_msg;
+        mach_msg_size_t                scatter_list_size = 0; /* NOT INITIALIZED - but not used in pactice */
+       __unused mach_port_seqno_t temp_seqno = 0;
+
        mach_msg_return_t  mr = MACH_MSG_SUCCESS;
+#if    ENABLE_HOTPATH
        /* mask out some of the options before entering the hot path */
        mach_msg_option_t  masked_option = 
                option & ~(MACH_SEND_TRAILER|MACH_RCV_TRAILER_MASK|MACH_RCV_LARGE);
-       int i;
+       register mach_msg_header_t *hdr;
 
-#if    ENABLE_HOTPATH
-       /* BEGINNING OF HOT PATH */
        if ((masked_option == (MACH_SEND_MSG|MACH_RCV_MSG)) && enable_hotpath) {
-               register thread_t self = current_thread();
-               register mach_msg_format_0_trailer_t *trailer;
-
-               ipc_space_t space = current_act()->task->itk_space;
+               thread_t self = current_thread();
+               mach_msg_format_0_trailer_t *trailer;
+               ipc_space_t space = self->task->itk_space;
                ipc_kmsg_t kmsg;
                register ipc_port_t dest_port;
                ipc_object_t rcv_object;
-               register ipc_mqueue_t rcv_mqueue;
+               ipc_mqueue_t rcv_mqueue;
                mach_msg_size_t reply_size;
-               ipc_kmsg_t rcv_kmsg;
 
                c_mmot_combined_S_R++;
 
@@ -611,16 +712,17 @@ mach_msg_overwrite_trap(
                 *              server finds waiting messages and can't block.
                 */
 
-               mr = ipc_kmsg_get(msg, send_size, &kmsg);
+               mr = ipc_kmsg_get(msg_addr, send_size, &kmsg);
                if (mr != KERN_SUCCESS) {
                        return mr;
                }
-               hdr = &kmsg->ikm_header;
+               hdr = kmsg->ikm_header;
                trailer = (mach_msg_format_0_trailer_t *) ((vm_offset_t) hdr +
                                                           send_size);
 
-           fast_copyin:
                /*
+                * fast_copyin:
+                *
                 *      optimized ipc_kmsg_copyin/ipc_mqueue_copyin
                 *
                 *      We have the request message data in kmsg.
@@ -678,6 +780,7 @@ mach_msg_overwrite_trap(
                                }
                        } else {
                                entry = IE_NULL;
+                               bits = 0;
                        }
                        if (entry == IE_NULL) {
                                entry = ipc_entry_lookup(space, reply_name);
@@ -727,6 +830,7 @@ mach_msg_overwrite_trap(
                                }
                        } else {
                                entry = IE_NULL;
+                               bits = 0;
                        }
                        if (entry == IE_NULL) {
                                entry = ipc_entry_lookup(space, dest_name);
@@ -857,7 +961,6 @@ mach_msg_overwrite_trap(
                        register ipc_entry_t entry;
                        register mach_port_gen_t gen;
                        register mach_port_index_t index;
-                       ipc_table_index_t *requests;
 
                    {
                        register mach_port_name_t dest_name =
@@ -947,6 +1050,7 @@ mach_msg_overwrite_trap(
                                }
                        } else {
                                entry = IE_NULL;
+                               bits = 0;
                        }
                        if (entry == IE_NULL) {
                                entry = ipc_entry_lookup(space, rcv_name);
@@ -1057,9 +1161,6 @@ mach_msg_overwrite_trap(
                  register ipc_mqueue_t dest_mqueue;
                  wait_queue_t waitq;
                  thread_t receiver;
-#if    THREAD_SWAPPER
-                 thread_act_t rcv_act;
-#endif
                  spl_t s;
 
                  s = splsched();
@@ -1067,166 +1168,81 @@ mach_msg_overwrite_trap(
                  waitq = &dest_mqueue->imq_wait_queue;
                  imq_lock(dest_mqueue);
 
-                 wait_queue_peek64_locked(waitq, IPC_MQUEUE_RECEIVE, &receiver, &waitq);
-                 /* queue still locked, thread locked - but still on q */
-
-                 if (receiver == THREAD_NULL) {
-                 abort_send_receive:
-                       imq_unlock(dest_mqueue);
-                       splx(s);
-                       ip_unlock(dest_port);
-                       ipc_object_release(rcv_object);
-                       HOT(c_mmot_cold_032++);
-                       goto slow_send;
-                 }
-
-                 assert(receiver->wait_queue == waitq);
-                 assert(receiver->wait_event == IPC_MQUEUE_RECEIVE);
-               
-                 /*
-                  * See if it is still running on another processor (trying to
-                  * block itself).  If so, fall off.
-                  *
-                  * JMM - We have an opportunity here.  Since the thread is locked
-                  * and we find it runnable, it must still be trying to get into
-                  * thread_block on itself.  We could just "hand him the message"
-                  * and let him go (thread_go_locked()) and then fall down into a
-                  * slow receive for ourselves.  Only his RECEIVE_TOO_LARGE handling
-                  * runs afoul of that.  Clean this up!
-                  */
-                 if ((receiver->state & (TH_RUN|TH_WAIT)) != TH_WAIT) {
-                       assert(NCPUS > 1);
-                       HOT(c_mmot_cold_033++);
-               fall_off:
-                       thread_unlock(receiver);
-                       if (waitq != &dest_mqueue->imq_wait_queue)
-                               wait_queue_unlock(waitq);
-                       goto abort_send_receive;
-                 }
-
-                 /*
-                  * Check that the receiver can stay on the hot path.
-                  */
-                 if (send_size + REQUESTED_TRAILER_SIZE(receiver->ith_option) >
-                         receiver->ith_msize) {
-                       /*
-                        *      The receiver can't accept the message.
-                        */
-                       HOT(c_mmot_bad_rcvr++);
-                       goto fall_off;
-                 }
-
-#if    THREAD_SWAPPER
-                 /*
-                  * Receiver looks okay -- is it swapped in?
-                  */
-                 rcv_act = receiver->top_act;
-                 if (rcv_act->swap_state != TH_SW_IN &&
-                         rcv_act->swap_state != TH_SW_UNSWAPPABLE) {
-                       HOT(c_mmot_rcvr_swapped++);
-                       goto fall_off;
-                 }
-
-                 /*
-                  * Make sure receiver stays swapped in (if we can).
-                  */
-                 if (!act_lock_try(rcv_act)) { /* out of order! */
-                       HOT(c_mmot_rcvr_locked++);
-                       goto fall_off;
-                 }
-               
-                 /*
-                  * Check for task swapping in progress affecting
-                  * receiver.  Since rcv_act is attached to a shuttle,
-                  * its swap_state is covered by shuttle's thread_lock()
-                  * (sigh).
-                  */
-                 if ((rcv_act->swap_state != TH_SW_IN &&
-                       rcv_act->swap_state != TH_SW_UNSWAPPABLE) ||
-                       rcv_act->ast & AST_SWAPOUT) {
-                       act_unlock(rcv_act);
-                       HOT(c_mmot_rcvr_tswapped++);
-                       goto fall_off;
+           get_next_receiver:
+                 receiver = wait_queue_wakeup64_identity_locked(waitq,
+                                                               IPC_MQUEUE_RECEIVE, 
+                                                               THREAD_AWAKENED,
+                                                               FALSE);
+                 /* queue still locked, receiver thread locked (if any) */
+
+                 if (  receiver == THREAD_NULL ) {
+                         imq_unlock(dest_mqueue);
+                         splx(s);
+
+                         ip_unlock(dest_port);
+                         ipc_object_release(rcv_object);
+                         HOT(c_mmot_cold_032++);
+                         goto slow_send;
                  }
 
                  /*
-                  * We don't need to make receiver unswappable here -- holding
-                  * act_lock() of rcv_act is sufficient to prevent either thread
-                  * or task swapping from changing its state (see swapout_scan(),
-                  * task_swapout()).  Don't release lock till receiver's state
-                  * is consistent.  Its task may then be marked for swapout,
-                  * but that's life.
-                  */
-                 /*
-                  * NB:  act_lock(rcv_act) still held
-                  */
-#endif /* THREAD_SWAPPER */
-
-                 /*
-                  * Before committing to the handoff, make sure that we are
-                  * really going to block (i.e. there are no messages already
-                  * queued for us.  This violates lock ordering, so make sure
-                  * we don't deadlock. After the trylock succeeds below, we
-                  * may have up to 3 message queues locked:
-                  *    - the dest port mqueue
-                  *    - a portset mqueue (where waiting receiver was found)
-                  *    - finally our own rcv_mqueue
+                  * Check that the receiver can handle the size of the message.
+                  * If not, and the receiver just wants to be informed of that
+                  * fact, set it running and try to find another thread.
                   *
-                  * JMM - Need to make this check appropriate for portsets as
-                  * well before re-enabling them.
+                  * If he didn't want the "too large" message left on the queue,
+                  * give it to him anyway, he'll consume it as part of his receive
+                  * processing.
                   */
-                 if (!imq_lock_try(rcv_mqueue)) {
-                       goto fall_off;
-                 }
-                 if (ipc_kmsg_queue_first(&rcv_mqueue->imq_messages) != IKM_NULL) {
-                       imq_unlock(rcv_mqueue);
-                       HOT(c_mmot_cold_033++);
-                       goto fall_off;
+                 if (receiver->ith_msize < 
+                     ipc_kmsg_copyout_size(kmsg, receiver->map) + 
+                     REQUESTED_TRAILER_SIZE(receiver->ith_option))
+                 {
+                         receiver->ith_msize = kmsg->ikm_header->msgh_size;
+                         receiver->ith_state = MACH_RCV_TOO_LARGE;
+
+                         if ((receiver->ith_option & MACH_RCV_LARGE) != 0) {
+                                 receiver->ith_kmsg = IKM_NULL;
+                                 receiver->ith_seqno = 0;
+                                 thread_unlock(receiver);
+                                 HOT(c_mmot_bad_rcvr++);
+                                 goto get_next_receiver;
+                         }
+                 } else {
+                         receiver->ith_state = MACH_MSG_SUCCESS;
                  }
 
-                 /* At this point we are committed to do the "handoff". */
+                 /* At this point we are committed to do the message handoff. */
                  c_mach_msg_trap_switch_fast++;
-                 
-                 /*
-                  * JMM - Go ahead and pull the receiver from the runq.  If the
-                  * runq wasn't the one for the mqueue, unlock it.
-                  */
-                 wait_queue_pull_thread_locked(waitq,
-                                                               receiver,
-                                                               (waitq != &dest_mqueue->imq_wait_queue));
 
                  /*
                   *    Store the kmsg and seqno where the receiver can pick it up.
+                  *    and set it running.
                   */
-                 receiver->ith_state = MACH_MSG_SUCCESS;
                  receiver->ith_kmsg = kmsg;
                  receiver->ith_seqno = dest_mqueue->imq_seqno++;
-
-                 /*
-                  * Inline thread_go_locked
-                  *
-                  * JMM - Including hacked in version of setrun scheduler op
-                  * that doesn't try to put thread on a runq.
-                  */
-                 {
-                         receiver->state &= ~(TH_WAIT|TH_UNINT);
-                         hw_atomic_add(&receiver->processor_set->run_count, 1);
-                         receiver->state |= TH_RUN;
-                         receiver->wait_result = THREAD_AWAKENED;
-
-                         receiver->computation_metered = 0;
-                         receiver->reason = AST_NONE;
-                 }
-                 
                  thread_unlock(receiver);
-#if    THREAD_SWAPPER
-                 act_unlock(rcv_act);
-#endif /* THREAD_SWAPPER */
 
                  imq_unlock(dest_mqueue);
                  ip_unlock(dest_port);
                  current_task()->messages_sent++;
+                
+                 /*
+                  * Now prepare to wait on our receive queue.  But we have to make
+                  * sure the queue doesn't already have messages. If it does, we'll
+                  * have to do a slow receive.
+                  *
+                  * JMM - Need to make this check appropriate for portsets as
+                  * well before re-enabling them.
+                  */
+                 imq_lock(rcv_mqueue);
 
+                 if (ipc_kmsg_queue_first(&rcv_mqueue->imq_messages) != IKM_NULL) {
+                       imq_unlock(rcv_mqueue);
+                       splx(s);
+                       HOT(c_mmot_cold_033++);
+                       goto slow_receive;
+                 }
 
                  /*
                   *    Put self on receive port's queue.
@@ -1234,7 +1250,8 @@ mach_msg_overwrite_trap(
                   *    our reply message needs to determine if it
                   *    can hand off directly back to us.
                   */
-                 self->ith_msg = (rcv_msg) ? rcv_msg : msg;
+                 thread_lock(self);
+                 self->ith_msg_addr = (rcv_msg_addr) ? rcv_msg_addr : msg_addr;
                  self->ith_object = rcv_object; /* still holds reference */
                  self->ith_msize = rcv_size;
                  self->ith_option = option;
@@ -1244,15 +1261,12 @@ mach_msg_overwrite_trap(
                  waitq = &rcv_mqueue->imq_wait_queue;
                  (void)wait_queue_assert_wait64_locked(waitq,
                                                                                IPC_MQUEUE_RECEIVE,
-                                                                               THREAD_ABORTSAFE,
-                                                                               TRUE); /* unlock? */
-                 /* rcv_mqueue is unlocked */
-
-                 /*
-                  * Switch directly to receiving thread, and block
-                  * this thread as though it had called ipc_mqueue_receive.
-                  */
-                 thread_run(self, ipc_mqueue_receive_continue, receiver);
+                                                                               THREAD_ABORTSAFE, 0,
+                                                                               self);
+                 thread_unlock(self);
+                 imq_unlock(rcv_mqueue);
+                 splx(s);
+                 thread_block(ipc_mqueue_receive_continue);
                  /* NOTREACHED */
                }
 
@@ -1464,8 +1478,10 @@ mach_msg_overwrite_trap(
                        mr = ipc_kmsg_copyout_body(kmsg, space,
                                                   current_map(), 
                                                   MACH_MSG_BODY_NULL);
+                       /* hdr and send_size may be invalid now - done use */
                        if (mr != MACH_MSG_SUCCESS) {
-                               if (ipc_kmsg_put(msg, kmsg, hdr->msgh_size +
+                               if (ipc_kmsg_put(msg_addr, kmsg, 
+                                              kmsg->ikm_header->msgh_size +
                                               trailer->msgh_trailer_size) == 
                                                        MACH_RCV_INVALID_DATA)
                                        return MACH_RCV_INVALID_DATA;
@@ -1483,9 +1499,10 @@ mach_msg_overwrite_trap(
                /*NOTREACHED*/
 
            fast_put:
-               mr = ipc_kmsg_put(rcv_msg ? rcv_msg : msg,
+               mr = ipc_kmsg_put(rcv_msg_addr ? rcv_msg_addr : msg_addr,
                                  kmsg,
-                                 hdr->msgh_size + trailer->msgh_trailer_size);
+                                 kmsg->ikm_header->msgh_size + 
+                                 trailer->msgh_trailer_size);
                if (mr != MACH_MSG_SUCCESS) {
                        return MACH_RCV_INVALID_DATA;
                }
@@ -1502,10 +1519,6 @@ mach_msg_overwrite_trap(
 
            slow_copyin:
            {
-               ipc_kmsg_t temp_kmsg;
-               mach_port_seqno_t temp_seqno;
-               ipc_object_t temp_rcv_object;
-               ipc_mqueue_t temp_rcv_mqueue;
                register mach_port_name_t reply_name =
                                (mach_port_name_t)hdr->msgh_local_port;
 
@@ -1523,8 +1536,15 @@ mach_msg_overwrite_trap(
                        return(mr);
                }
 
-               /* try to get back on optimized path */
+               /* 
+                *      LP64support - We have to recompute the header pointer
+                *      and send_size - as they could have changed during the
+                *      complex copyin.
+                */
+               hdr = kmsg->ikm_header;
+               send_size = hdr->msgh_size;
 
+               /* try to get back on optimized path */
                if ((reply_name != rcv_name) ||
                        (hdr->msgh_bits & MACH_MSGH_BITS_CIRCULAR)) {
                        HOT(c_mmot_cold_048++);
@@ -1591,7 +1611,6 @@ mach_msg_overwrite_trap(
 
            {
                register ipc_port_t     reply_port;
-               mach_port_seqno_t       local_seqno;
                spl_t s;
 
                /*
@@ -1621,7 +1640,7 @@ mach_msg_overwrite_trap(
                 * we cannot directly receive the reply
                 * message.
                 */
-               hdr = &kmsg->ikm_header;
+               hdr = kmsg->ikm_header;
                send_size = hdr->msgh_size;
                trailer = (mach_msg_format_0_trailer_t *) ((vm_offset_t) hdr +
                        round_msg(send_size));
@@ -1668,7 +1687,7 @@ mach_msg_overwrite_trap(
                 * no threads blocked waiting to send.
                 */
                dest_port = reply_port;
-               local_seqno = rcv_mqueue->imq_seqno++;
+               temp_seqno = rcv_mqueue->imq_seqno++;
                imq_unlock(rcv_mqueue);
                splx(s);
 
@@ -1680,7 +1699,7 @@ mach_msg_overwrite_trap(
                ip_check_unlock(reply_port);
 
                if (option & MACH_RCV_TRAILER_MASK) {
-                       trailer->msgh_seqno = local_seqno;      
+                       trailer->msgh_seqno = temp_seqno;       
                        trailer->msgh_trailer_size = REQUESTED_TRAILER_SIZE(option);
                }
                /* copy out the kernel reply */
@@ -1701,7 +1720,8 @@ mach_msg_overwrite_trap(
                                                      current_map(),
                                                      MACH_MSG_BODY_NULL);
 
-                       (void) ipc_kmsg_put(msg, kmsg, hdr->msgh_size);
+                       (void) ipc_kmsg_put(msg_addr, kmsg, 
+                                           kmsg->ikm_header->msgh_size);
                        return(mr);
                }
 
@@ -1710,21 +1730,20 @@ mach_msg_overwrite_trap(
                 * We have sent the message.  Copy in the receive port.
                 */
                mr = ipc_mqueue_copyin(space, rcv_name,
-                                      &temp_rcv_mqueue, &temp_rcv_object);
+                                      &rcv_mqueue, &rcv_object);
                if (mr != MACH_MSG_SUCCESS) {
                        return(mr);
                }
-               rcv_mqueue = temp_rcv_mqueue;
-               rcv_object = temp_rcv_object;
                /* hold ref for rcv_object */
 
-           slow_receive:
                /*
+                *
                 *      Now we have sent the request and copied in rcv_name,
                 *      and hold ref for rcv_object (to keep mqueue alive).
                 *  Just receive a reply and try to get back to fast path.
                 */
 
+               slow_receive:
                self->ith_continuation = (void (*)(mach_msg_return_t))0;
                ipc_mqueue_receive(rcv_mqueue,
                                   MACH_MSG_OPTION_NONE,
@@ -1733,27 +1752,26 @@ mach_msg_overwrite_trap(
                                   THREAD_ABORTSAFE);
 
                mr = self->ith_state;
-               temp_kmsg = self->ith_kmsg;
                temp_seqno = self->ith_seqno;
 
                ipc_object_release(rcv_object);
 
-                 if (mr != MACH_MSG_SUCCESS) {
+               if (mr != MACH_MSG_SUCCESS) {
                    return(mr);
-                 }
+               }
 
-                 kmsg = temp_kmsg;
-                 hdr = &kmsg->ikm_header;
-                 send_size = hdr->msgh_size;
-                 trailer = (mach_msg_format_0_trailer_t *) ((vm_offset_t) hdr +
-                                                            round_msg(send_size));
-                 if (option & MACH_RCV_TRAILER_MASK) {
+               kmsg = self->ith_kmsg;
+               hdr = kmsg->ikm_header;
+               send_size = hdr->msgh_size;
+               trailer = (mach_msg_format_0_trailer_t *) ((vm_offset_t) hdr +
+                                                                                                  round_msg(send_size));
+               if (option & MACH_RCV_TRAILER_MASK) {
                    trailer->msgh_seqno = temp_seqno;   
                    trailer->msgh_trailer_size = REQUESTED_TRAILER_SIZE(option);
-                 }
-                 dest_port = (ipc_port_t) hdr->msgh_remote_port;
-                 HOT(c_mmot_cold_055++);
-                 goto fast_copyout;
+               }
+               dest_port = (ipc_port_t) hdr->msgh_remote_port;
+               HOT(c_mmot_cold_055++);
+               goto fast_copyout;
 
            slow_copyout:
                /*
@@ -1763,9 +1781,12 @@ mach_msg_overwrite_trap(
                 *      ipc_kmsg_copyout/ipc_kmsg_put.
                 */
 
-               reply_size = send_size + trailer->msgh_trailer_size;
+               /* LP64support - have to compute real size as it would be received */
+               reply_size = ipc_kmsg_copyout_size(kmsg, current_map()) +
+                            REQUESTED_TRAILER_SIZE(option);
+               temp_seqno = trailer->msgh_seqno;
                if (rcv_size < reply_size) {
-                       if (msg_receive_error(kmsg, msg, option, temp_seqno,
+                       if (msg_receive_error(kmsg, msg_addr, option, temp_seqno,
                                        space) == MACH_RCV_INVALID_DATA) {
                                mr = MACH_RCV_INVALID_DATA;
                                return(mr);
@@ -1780,12 +1801,12 @@ mach_msg_overwrite_trap(
                                      MACH_PORT_NULL, MACH_MSG_BODY_NULL);
                if (mr != MACH_MSG_SUCCESS) {
                        if ((mr &~ MACH_MSG_MASK) == MACH_RCV_BODY_ERROR) {
-                               if (ipc_kmsg_put(msg, kmsg, reply_size) == 
+                               if (ipc_kmsg_put(msg_addr, kmsg, reply_size) == 
                                                        MACH_RCV_INVALID_DATA)
                                        mr = MACH_RCV_INVALID_DATA;
                        } 
                        else {
-                               if (msg_receive_error(kmsg, msg, option,
+                               if (msg_receive_error(kmsg, msg_addr, option,
                                    temp_seqno, space) == MACH_RCV_INVALID_DATA)
                                        mr = MACH_RCV_INVALID_DATA;
                        }
@@ -1803,15 +1824,48 @@ mach_msg_overwrite_trap(
 #endif /* ENABLE_HOTPATH */
 
        if (option & MACH_SEND_MSG) {
-               mr = mach_msg_send(msg, option, send_size,
-                                  timeout, notify);
+               ipc_space_t space = current_space();
+               vm_map_t map = current_map();
+               ipc_kmsg_t kmsg;
+
+               mr = ipc_kmsg_get(msg_addr, send_size, &kmsg);
+
+               if (mr != MACH_MSG_SUCCESS)
+                       return mr;
+
+               if (option & MACH_SEND_CANCEL) {
+                       if (notify == MACH_PORT_NULL)
+                               mr = MACH_SEND_INVALID_NOTIFY;
+                       else
+                               mr = ipc_kmsg_copyin(kmsg, space, map, notify);
+               } else
+                       mr = ipc_kmsg_copyin(kmsg, space, map, MACH_PORT_NULL);
+               if (mr != MACH_MSG_SUCCESS) {
+                       ipc_kmsg_free(kmsg);
+                       return mr;
+               }
+
+               mr = ipc_kmsg_send(kmsg, option & MACH_SEND_TIMEOUT, msg_timeout);
+
                if (mr != MACH_MSG_SUCCESS) {
+                       mr |= ipc_kmsg_copyout_pseudo(kmsg, space, map, MACH_MSG_BODY_NULL);
+                       (void) ipc_kmsg_put(msg_addr, kmsg, kmsg->ikm_header->msgh_size);
                        return mr;
                }
+
        }
 
        if (option & MACH_RCV_MSG) {
-               mach_msg_header_t *rcv;
+               thread_t self = current_thread();
+               ipc_space_t space = current_space();
+               ipc_object_t object;
+               ipc_mqueue_t mqueue;
+
+               mr = ipc_mqueue_copyin(space, rcv_name, &mqueue, &object);
+               if (mr != MACH_MSG_SUCCESS) {
+                       return mr;
+               }
+               /* hold ref for object */
 
                /*
                 * 1. MACH_RCV_OVERWRITE is on, and rcv_msg is our scatter list
@@ -1820,14 +1874,21 @@ mach_msg_overwrite_trap(
                 *    alternate receive buffer (separate send and receive buffers).
                 */
                if (option & MACH_RCV_OVERWRITE) 
-                   rcv = rcv_msg;
-               else if (rcv_msg != MACH_MSG_NULL)
-                   rcv = rcv_msg;
+                       self->ith_msg_addr = rcv_msg_addr;
+               else if (rcv_msg_addr != (mach_vm_address_t)0)
+                       self->ith_msg_addr = rcv_msg_addr;
                else
-                   rcv = msg;
-               mr = mach_msg_receive(rcv, option, rcv_size, rcv_name, 
-                                     timeout, thread_syscall_return, scatter_list_size);
-               thread_syscall_return(mr);
+                       self->ith_msg_addr = msg_addr;
+               self->ith_object = object;
+               self->ith_msize = rcv_size;
+               self->ith_option = option;
+               self->ith_scatter_list_size = scatter_list_size;
+               self->ith_continuation = thread_syscall_return;
+
+               ipc_mqueue_receive(mqueue, option, rcv_size, msg_timeout, THREAD_ABORTSAFE);
+               if ((option & MACH_RCV_TIMEOUT) && msg_timeout == 0)
+                       thread_poll_yield(self);
+               return mach_msg_receive_results();
        }
 
        return MACH_MSG_SUCCESS;
@@ -1845,23 +1906,13 @@ mach_msg_overwrite_trap(
 
 mach_msg_return_t
 mach_msg_trap(
-       mach_msg_header_t       *msg,
-       mach_msg_option_t       option,
-       mach_msg_size_t         send_size,
-       mach_msg_size_t         rcv_size,
-       mach_port_name_t        rcv_name,
-       mach_msg_timeout_t      timeout,
-       mach_port_name_t        notify)
+       struct mach_msg_overwrite_trap_args *args)
 {
-  return mach_msg_overwrite_trap(msg,
-                                option,
-                                send_size,
-                                rcv_size,
-                                rcv_name,
-                                timeout,
-                                notify,
-                                (mach_msg_header_t *)0,
-                                (mach_msg_size_t)0);
+       kern_return_t kr;
+       args->rcv_msg = (mach_vm_address_t)0;
+
+       kr = mach_msg_overwrite_trap(args);
+       return kr;
 }
  
 
@@ -1881,7 +1932,7 @@ mach_msg_trap(
 mach_msg_return_t
 msg_receive_error(
        ipc_kmsg_t              kmsg,
-       mach_msg_header_t       *msg,
+       mach_vm_address_t       msg_addr,
        mach_msg_option_t       option,
        mach_port_seqno_t       seqno,
        ipc_space_t             space)
@@ -1898,9 +1949,9 @@ msg_receive_error(
         * Build a minimal message with the requested trailer.
         */
        trailer = (mach_msg_format_0_trailer_t *) 
-                       ((vm_offset_t)&kmsg->ikm_header +
+                       ((vm_offset_t)kmsg->ikm_header +
                        round_msg(sizeof(mach_msg_header_t)));
-       kmsg->ikm_header.msgh_size = sizeof(mach_msg_header_t);
+       kmsg->ikm_header->msgh_size = sizeof(mach_msg_header_t);
        bcopy(  (char *)&trailer_template, 
                (char *)trailer, 
                sizeof(trailer_template));
@@ -1912,7 +1963,7 @@ msg_receive_error(
        /*
         * Copy the message to user space
         */
-       if (ipc_kmsg_put(msg, kmsg, kmsg->ikm_header.msgh_size +
+       if (ipc_kmsg_put(msg_addr, kmsg, kmsg->ikm_header->msgh_size +
                        trailer->msgh_trailer_size) == MACH_RCV_INVALID_DATA)
                return(MACH_RCV_INVALID_DATA);
        else