+ * ipc_kmsg_print64 [ debug ]
+ */
+void
+ipc_kmsg_print64(
+ ipc_kmsg_t kmsg,
+ const char *str)
+{
+ kprintf("%s kmsg=%p:\n", str, kmsg);
+ kprintf(" next=%p, prev=%p, size=%d",
+ kmsg->ikm_next,
+ kmsg->ikm_prev,
+ kmsg->ikm_size);
+ kprintf("\n");
+ ipc_msg_print64(kmsg->ikm_header);
+}
+
+const char *
+msgh_bit_decode64(
+ mach_msg_bits_t bit)
+{
+ switch (bit) {
+ case MACH_MSGH_BITS_COMPLEX: return "complex";
+ case MACH_MSGH_BITS_CIRCULAR: return "circular";
+ default: return (char *) 0;
+ }
+}
+
+/*
+ * ipc_msg_print64 [ debug ]
+ */
+void
+ipc_msg_print64(
+ mach_msg_header_t *msgh)
+{
+ mach_msg_bits_t mbits;
+ unsigned int bit, i;
+ const char *bit_name;
+ int needs_comma;
+
+ mbits = msgh->msgh_bits;
+ kprintf(" msgh_bits=0x%x: l=0x%x,r=0x%x\n",
+ mbits,
+ MACH_MSGH_BITS_LOCAL(msgh->msgh_bits),
+ MACH_MSGH_BITS_REMOTE(msgh->msgh_bits));
+
+ mbits = MACH_MSGH_BITS_OTHER(mbits) & MACH_MSGH_BITS_USED;
+ kprintf(" decoded bits: ");
+ needs_comma = 0;
+ for (i = 0, bit = 1; i < sizeof(mbits) * 8; ++i, bit <<= 1) {
+ if ((mbits & bit) == 0) {
+ continue;
+ }
+ bit_name = msgh_bit_decode64((mach_msg_bits_t)bit);
+ if (bit_name) {
+ kprintf("%s%s", needs_comma ? "," : "", bit_name);
+ } else {
+ kprintf("%sunknown(0x%x),", needs_comma ? "," : "", bit);
+ }
+ ++needs_comma;
+ }
+ if (msgh->msgh_bits & ~MACH_MSGH_BITS_USED) {
+ kprintf("%sunused=0x%x,", needs_comma ? "," : "",
+ msgh->msgh_bits & ~MACH_MSGH_BITS_USED);
+ }
+ kprintf("\n");
+
+ needs_comma = 1;
+ if (msgh->msgh_remote_port) {
+ kprintf(" remote=%p(", msgh->msgh_remote_port);
+ ipc_print_type_name64(MACH_MSGH_BITS_REMOTE(msgh->msgh_bits));
+ kprintf(")");
+ } else {
+ kprintf(" remote=null");
+ }
+
+ if (msgh->msgh_local_port) {
+ kprintf("%slocal=%p(", needs_comma ? "," : "",
+ msgh->msgh_local_port);
+ ipc_print_type_name64(MACH_MSGH_BITS_LOCAL(msgh->msgh_bits));
+ kprintf(")\n");
+ } else {
+ kprintf("local=null\n");
+ }
+
+ kprintf(" msgh_id=%d, size=%d\n",
+ msgh->msgh_id,
+ msgh->msgh_size);
+
+ if (mbits & MACH_MSGH_BITS_COMPLEX) {
+ ipc_msg_print_untyped64((mach_msg_body_t *) (msgh + 1));
+ }
+
+ ipc_msg_body_print64((void *)(msgh + 1), msgh->msgh_size);
+}
+
+
+const char *
+mm_copy_options_string64(
+ mach_msg_copy_options_t option)
+{
+ const char *name;
+
+ switch (option) {
+ case MACH_MSG_PHYSICAL_COPY:
+ name = "PHYSICAL";
+ break;
+ case MACH_MSG_VIRTUAL_COPY:
+ name = "VIRTUAL";
+ break;
+ case MACH_MSG_OVERWRITE:
+ name = "OVERWRITE(DEPRECATED)";
+ break;
+ case MACH_MSG_ALLOCATE:
+ name = "ALLOCATE";
+ break;
+ case MACH_MSG_KALLOC_COPY_T:
+ name = "KALLOC_COPY_T";
+ break;
+ default:
+ name = "unknown";
+ break;
+ }
+ return name;
+}
+
+void
+ipc_msg_print_untyped64(
+ mach_msg_body_t *body)
+{
+ mach_msg_descriptor_t *saddr, *send;
+ mach_msg_descriptor_type_t type;
+
+ kprintf(" %d descriptors: \n", body->msgh_descriptor_count);
+
+ saddr = (mach_msg_descriptor_t *) (body + 1);
+ send = saddr + body->msgh_descriptor_count;
+
+ for (; saddr < send; saddr++) {
+ type = saddr->type.type;
+
+ switch (type) {
+ case MACH_MSG_PORT_DESCRIPTOR: {
+ mach_msg_port_descriptor_t *dsc;
+
+ dsc = &saddr->port;
+ kprintf(" PORT name = %p disp = ", dsc->name);
+ ipc_print_type_name64(dsc->disposition);
+ kprintf("\n");
+ break;
+ }
+ case MACH_MSG_OOL_VOLATILE_DESCRIPTOR:
+ case MACH_MSG_OOL_DESCRIPTOR: {
+ mach_msg_ool_descriptor_t *dsc;
+
+ dsc = (mach_msg_ool_descriptor_t *) &saddr->out_of_line;
+ kprintf(" OOL%s addr = %p size = 0x%x copy = %s %s\n",
+ type == MACH_MSG_OOL_DESCRIPTOR ? "" : " VOLATILE",
+ dsc->address, dsc->size,
+ mm_copy_options_string64(dsc->copy),
+ dsc->deallocate ? "DEALLOC" : "");
+ break;
+ }
+ case MACH_MSG_OOL_PORTS_DESCRIPTOR: {
+ mach_msg_ool_ports_descriptor_t *dsc;
+
+ dsc = (mach_msg_ool_ports_descriptor_t *) &saddr->ool_ports;
+
+ kprintf(" OOL_PORTS addr = %p count = 0x%x ",
+ dsc->address, dsc->count);
+ kprintf("disp = ");
+ ipc_print_type_name64(dsc->disposition);
+ kprintf(" copy = %s %s\n",
+ mm_copy_options_string64(dsc->copy),
+ dsc->deallocate ? "DEALLOC" : "");
+ break;
+ }
+ case MACH_MSG_GUARDED_PORT_DESCRIPTOR: {
+ mach_msg_guarded_port_descriptor_t *dsc;
+
+ dsc = (mach_msg_guarded_port_descriptor_t *)&saddr->guarded_port;
+ kprintf(" GUARDED_PORT name = %p flags = 0x%x disp = ", dsc->name, dsc->flags);
+ ipc_print_type_name64(dsc->disposition);
+ kprintf("\n");
+ break;
+ }
+ default: {
+ kprintf(" UNKNOWN DESCRIPTOR 0x%x\n", type);
+ break;
+ }
+ }
+ }
+}
+
+#define DEBUG_IPC_KMSG_PRINT(kmsg, string) \
+ __unreachable_ok_push \
+ if (DEBUG_KPRINT_SYSCALL_PREDICATE(DEBUG_KPRINT_SYSCALL_IPC_MASK)) { \
+ ipc_kmsg_print64(kmsg, string); \
+ } \
+ __unreachable_ok_pop
+
+#define DEBUG_IPC_MSG_BODY_PRINT(body, size) \
+ __unreachable_ok_push \
+ if (DEBUG_KPRINT_SYSCALL_PREDICATE(DEBUG_KPRINT_SYSCALL_IPC_MASK)) { \
+ ipc_msg_body_print64(body,size);\
+ } \
+ __unreachable_ok_pop
+#else /* !DEBUG_MSGS_K64 */
+#define DEBUG_IPC_KMSG_PRINT(kmsg, string)
+#define DEBUG_IPC_MSG_BODY_PRINT(body, size)
+#endif /* !DEBUG_MSGS_K64 */
+
+extern vm_map_t ipc_kernel_copy_map;
+extern vm_size_t ipc_kmsg_max_space;
+extern const vm_size_t ipc_kmsg_max_vm_space;
+extern const vm_size_t ipc_kmsg_max_body_space;
+extern vm_size_t msg_ool_size_small;
+
+#define MSG_OOL_SIZE_SMALL msg_ool_size_small
+
+#if defined(__LP64__)
+#define MAP_SIZE_DIFFERS(map) (map->max_offset < MACH_VM_MAX_ADDRESS)
+#define OTHER_OOL_DESCRIPTOR mach_msg_ool_descriptor32_t
+#define OTHER_OOL_PORTS_DESCRIPTOR mach_msg_ool_ports_descriptor32_t
+#else
+#define MAP_SIZE_DIFFERS(map) (map->max_offset > VM_MAX_ADDRESS)
+#define OTHER_OOL_DESCRIPTOR mach_msg_ool_descriptor64_t
+#define OTHER_OOL_PORTS_DESCRIPTOR mach_msg_ool_ports_descriptor64_t
+#endif
+
+#define DESC_SIZE_ADJUSTMENT ((mach_msg_size_t)(sizeof(mach_msg_ool_descriptor64_t) - \
+ sizeof(mach_msg_ool_descriptor32_t)))
+
+/* scatter list macros */
+
+#define SKIP_PORT_DESCRIPTORS(s, c) \
+MACRO_BEGIN \
+ if ((s) != MACH_MSG_DESCRIPTOR_NULL) { \
+ while ((c) > 0) { \
+ if ((s)->type.type != MACH_MSG_PORT_DESCRIPTOR) \
+ break; \
+ (s)++; (c)--; \
+ } \
+ if (c == 0) \
+ (s) = MACH_MSG_DESCRIPTOR_NULL; \
+ } \
+MACRO_END
+
+#define INCREMENT_SCATTER(s, c, d) \
+MACRO_BEGIN \
+ if ((s) != MACH_MSG_DESCRIPTOR_NULL) { \
+ s = (d) ? (mach_msg_descriptor_t *) \
+ ((OTHER_OOL_DESCRIPTOR *)(s) + 1) : \
+ (s + 1); \
+ (c)--; \
+ } \
+MACRO_END
+
+#define KMSG_TRACE_FLAG_TRACED 0x000001
+#define KMSG_TRACE_FLAG_COMPLEX 0x000002
+#define KMSG_TRACE_FLAG_OOLMEM 0x000004
+#define KMSG_TRACE_FLAG_VCPY 0x000008
+#define KMSG_TRACE_FLAG_PCPY 0x000010
+#define KMSG_TRACE_FLAG_SND64 0x000020
+#define KMSG_TRACE_FLAG_RAISEIMP 0x000040
+#define KMSG_TRACE_FLAG_APP_SRC 0x000080
+#define KMSG_TRACE_FLAG_APP_DST 0x000100
+#define KMSG_TRACE_FLAG_DAEMON_SRC 0x000200
+#define KMSG_TRACE_FLAG_DAEMON_DST 0x000400
+#define KMSG_TRACE_FLAG_DST_NDFLTQ 0x000800
+#define KMSG_TRACE_FLAG_SRC_NDFLTQ 0x001000
+#define KMSG_TRACE_FLAG_DST_SONCE 0x002000
+#define KMSG_TRACE_FLAG_SRC_SONCE 0x004000
+#define KMSG_TRACE_FLAG_CHECKIN 0x008000
+#define KMSG_TRACE_FLAG_ONEWAY 0x010000
+#define KMSG_TRACE_FLAG_IOKIT 0x020000
+#define KMSG_TRACE_FLAG_SNDRCV 0x040000
+#define KMSG_TRACE_FLAG_DSTQFULL 0x080000
+#define KMSG_TRACE_FLAG_VOUCHER 0x100000
+#define KMSG_TRACE_FLAG_TIMER 0x200000
+#define KMSG_TRACE_FLAG_SEMA 0x400000
+#define KMSG_TRACE_FLAG_DTMPOWNER 0x800000
+#define KMSG_TRACE_FLAG_GUARDED_DESC 0x1000000
+
+#define KMSG_TRACE_FLAGS_MASK 0x1ffffff
+#define KMSG_TRACE_FLAGS_SHIFT 8
+
+#define KMSG_TRACE_PORTS_MASK 0xff
+#define KMSG_TRACE_PORTS_SHIFT 0
+
+#if (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD)
+#include <stdint.h>
+
+void
+ipc_kmsg_trace_send(ipc_kmsg_t kmsg,
+ mach_msg_option_t option)
+{
+ task_t send_task = TASK_NULL;
+ ipc_port_t dst_port, src_port;
+ boolean_t is_task_64bit;
+ mach_msg_header_t *msg;
+ mach_msg_trailer_t *trailer;
+
+ int kotype = 0;
+ uint32_t msg_size = 0;
+ uint64_t msg_flags = KMSG_TRACE_FLAG_TRACED;
+ uint32_t num_ports = 0;
+ uint32_t send_pid, dst_pid;
+
+ /*
+ * check to see not only if ktracing is enabled, but if we will
+ * _actually_ emit the KMSG_INFO tracepoint. This saves us a
+ * significant amount of processing (and a port lock hold) in
+ * the non-tracing case.
+ */
+ if (__probable((kdebug_enable & KDEBUG_TRACE) == 0)) {
+ return;
+ }
+ if (!kdebug_debugid_enabled(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO))) {
+ return;
+ }
+
+ msg = kmsg->ikm_header;
+
+ dst_port = msg->msgh_remote_port;
+ if (!IPC_PORT_VALID(dst_port)) {
+ return;
+ }
+
+ /*
+ * Message properties / options
+ */
+ if ((option & (MACH_SEND_MSG | MACH_RCV_MSG)) == (MACH_SEND_MSG | MACH_RCV_MSG)) {
+ msg_flags |= KMSG_TRACE_FLAG_SNDRCV;
+ }
+
+ if (msg->msgh_id >= is_iokit_subsystem.start &&
+ msg->msgh_id < is_iokit_subsystem.end + 100) {
+ msg_flags |= KMSG_TRACE_FLAG_IOKIT;
+ }
+ /* magic XPC checkin message id (XPC_MESSAGE_ID_CHECKIN) from libxpc */
+ else if (msg->msgh_id == 0x77303074u /* w00t */) {
+ msg_flags |= KMSG_TRACE_FLAG_CHECKIN;
+ }
+
+ if (msg->msgh_bits & MACH_MSGH_BITS_RAISEIMP) {
+ msg_flags |= KMSG_TRACE_FLAG_RAISEIMP;
+ }
+
+ if (unsafe_convert_port_to_voucher(kmsg->ikm_voucher)) {
+ msg_flags |= KMSG_TRACE_FLAG_VOUCHER;
+ }
+
+ /*
+ * Sending task / port
+ */
+ send_task = current_task();
+ send_pid = task_pid(send_task);
+
+ if (send_pid != 0) {
+ if (task_is_daemon(send_task)) {
+ msg_flags |= KMSG_TRACE_FLAG_DAEMON_SRC;
+ } else if (task_is_app(send_task)) {
+ msg_flags |= KMSG_TRACE_FLAG_APP_SRC;
+ }
+ }
+
+ is_task_64bit = (send_task->map->max_offset > VM_MAX_ADDRESS);
+ if (is_task_64bit) {
+ msg_flags |= KMSG_TRACE_FLAG_SND64;
+ }
+
+ src_port = msg->msgh_local_port;
+ if (src_port) {
+ if (src_port->ip_messages.imq_qlimit != MACH_PORT_QLIMIT_DEFAULT) {
+ msg_flags |= KMSG_TRACE_FLAG_SRC_NDFLTQ;
+ }
+ switch (MACH_MSGH_BITS_LOCAL(msg->msgh_bits)) {
+ case MACH_MSG_TYPE_MOVE_SEND_ONCE:
+ msg_flags |= KMSG_TRACE_FLAG_SRC_SONCE;
+ break;
+ default:
+ break;
+ }
+ } else {
+ msg_flags |= KMSG_TRACE_FLAG_ONEWAY;
+ }
+
+
+ /*
+ * Destination task / port
+ */
+ ip_lock(dst_port);
+ if (!ip_active(dst_port)) {
+ /* dst port is being torn down */
+ dst_pid = (uint32_t)0xfffffff0;
+ } else if (dst_port->ip_tempowner) {
+ msg_flags |= KMSG_TRACE_FLAG_DTMPOWNER;
+ if (IIT_NULL != dst_port->ip_imp_task) {
+ dst_pid = task_pid(dst_port->ip_imp_task->iit_task);
+ } else {
+ dst_pid = (uint32_t)0xfffffff1;
+ }
+ } else if (dst_port->ip_receiver_name == MACH_PORT_NULL) {
+ /* dst_port is otherwise in-transit */
+ dst_pid = (uint32_t)0xfffffff2;
+ } else {
+ if (dst_port->ip_receiver == ipc_space_kernel) {
+ dst_pid = 0;
+ } else {
+ ipc_space_t dst_space;
+ dst_space = dst_port->ip_receiver;
+ if (dst_space && is_active(dst_space)) {
+ dst_pid = task_pid(dst_space->is_task);
+ if (task_is_daemon(dst_space->is_task)) {
+ msg_flags |= KMSG_TRACE_FLAG_DAEMON_DST;
+ } else if (task_is_app(dst_space->is_task)) {
+ msg_flags |= KMSG_TRACE_FLAG_APP_DST;
+ }
+ } else {
+ /* receiving task is being torn down */
+ dst_pid = (uint32_t)0xfffffff3;
+ }
+ }
+ }
+
+ if (dst_port->ip_messages.imq_qlimit != MACH_PORT_QLIMIT_DEFAULT) {
+ msg_flags |= KMSG_TRACE_FLAG_DST_NDFLTQ;
+ }
+ if (imq_full(&dst_port->ip_messages)) {
+ msg_flags |= KMSG_TRACE_FLAG_DSTQFULL;
+ }
+
+ kotype = ip_kotype(dst_port);
+
+ ip_unlock(dst_port);
+
+ switch (kotype) {
+ case IKOT_SEMAPHORE:
+ msg_flags |= KMSG_TRACE_FLAG_SEMA;
+ break;
+ case IKOT_TIMER:
+ case IKOT_CLOCK:
+ msg_flags |= KMSG_TRACE_FLAG_TIMER;
+ break;
+ case IKOT_MASTER_DEVICE:
+ case IKOT_IOKIT_CONNECT:
+ case IKOT_IOKIT_OBJECT:
+ case IKOT_IOKIT_IDENT:
+ case IKOT_UEXT_OBJECT:
+ msg_flags |= KMSG_TRACE_FLAG_IOKIT;
+ break;
+ default:
+ break;
+ }
+
+ switch (MACH_MSGH_BITS_REMOTE(msg->msgh_bits)) {
+ case MACH_MSG_TYPE_PORT_SEND_ONCE:
+ msg_flags |= KMSG_TRACE_FLAG_DST_SONCE;
+ break;
+ default:
+ break;
+ }
+
+
+ /*
+ * Message size / content
+ */
+ msg_size = msg->msgh_size - sizeof(mach_msg_header_t);
+
+ if (msg->msgh_bits & MACH_MSGH_BITS_COMPLEX) {
+ mach_msg_body_t *msg_body;
+ mach_msg_descriptor_t *kern_dsc;
+ int dsc_count;
+
+ msg_flags |= KMSG_TRACE_FLAG_COMPLEX;
+
+ msg_body = (mach_msg_body_t *)(kmsg->ikm_header + 1);
+ dsc_count = (int)msg_body->msgh_descriptor_count;
+ kern_dsc = (mach_msg_descriptor_t *)(msg_body + 1);
+
+ /* this is gross: see ipc_kmsg_copyin_body()... */
+ if (!is_task_64bit) {
+ msg_size -= (dsc_count * 12);
+ }
+
+ for (int i = 0; i < dsc_count; i++) {
+ switch (kern_dsc[i].type.type) {
+ case MACH_MSG_PORT_DESCRIPTOR:
+ num_ports++;
+ if (is_task_64bit) {
+ msg_size -= 12;
+ }
+ break;
+ case MACH_MSG_OOL_VOLATILE_DESCRIPTOR:
+ case MACH_MSG_OOL_DESCRIPTOR: {
+ mach_msg_ool_descriptor_t *dsc;
+ dsc = (mach_msg_ool_descriptor_t *)&kern_dsc[i];
+ msg_flags |= KMSG_TRACE_FLAG_OOLMEM;
+ msg_size += dsc->size;
+ if ((dsc->size >= MSG_OOL_SIZE_SMALL) &&
+ (dsc->copy == MACH_MSG_PHYSICAL_COPY) &&
+ !dsc->deallocate) {
+ msg_flags |= KMSG_TRACE_FLAG_PCPY;
+ } else if (dsc->size <= MSG_OOL_SIZE_SMALL) {
+ msg_flags |= KMSG_TRACE_FLAG_PCPY;
+ } else {
+ msg_flags |= KMSG_TRACE_FLAG_VCPY;
+ }
+ if (is_task_64bit) {
+ msg_size -= 16;
+ }
+ } break;
+ case MACH_MSG_OOL_PORTS_DESCRIPTOR: {
+ mach_msg_ool_ports_descriptor_t *dsc;
+ dsc = (mach_msg_ool_ports_descriptor_t *)&kern_dsc[i];
+ num_ports += dsc->count;
+ if (is_task_64bit) {
+ msg_size -= 16;
+ }
+ } break;
+ case MACH_MSG_GUARDED_PORT_DESCRIPTOR:
+ num_ports++;
+ msg_flags |= KMSG_TRACE_FLAG_GUARDED_DESC;
+ if (is_task_64bit) {
+ msg_size -= 16;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ /*
+ * Trailer contents
+ */
+ trailer = (mach_msg_trailer_t *)((vm_offset_t)msg +
+ (vm_offset_t)mach_round_msg(msg->msgh_size));
+ if (trailer->msgh_trailer_size <= sizeof(mach_msg_security_trailer_t)) {
+ extern const security_token_t KERNEL_SECURITY_TOKEN;
+ mach_msg_security_trailer_t *strailer;
+ strailer = (mach_msg_security_trailer_t *)trailer;
+ /*
+ * verify the sender PID: replies from the kernel often look
+ * like self-talk because the sending port is not reset.
+ */
+ if (memcmp(&strailer->msgh_sender,
+ &KERNEL_SECURITY_TOKEN,
+ sizeof(KERNEL_SECURITY_TOKEN)) == 0) {
+ send_pid = 0;
+ msg_flags &= ~(KMSG_TRACE_FLAG_APP_SRC | KMSG_TRACE_FLAG_DAEMON_SRC);
+ }
+ }
+
+ KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO) | DBG_FUNC_END,
+ (uintptr_t)send_pid,
+ (uintptr_t)dst_pid,
+ (uintptr_t)msg_size,
+ (uintptr_t)(
+ ((msg_flags & KMSG_TRACE_FLAGS_MASK) << KMSG_TRACE_FLAGS_SHIFT) |
+ ((num_ports & KMSG_TRACE_PORTS_MASK) << KMSG_TRACE_PORTS_SHIFT)
+ )
+ );
+}
+#endif
+
+/* zone for cached ipc_kmsg_t structures */
+ZONE_DECLARE(ipc_kmsg_zone, "ipc kmsgs", IKM_SAVED_KMSG_SIZE,
+ ZC_CACHING | ZC_ZFREE_CLEARMEM);
+static TUNABLE(bool, enforce_strict_reply, "ipc_strict_reply", false);
+
+/*
+ * Forward declarations
+ */
+
+void ipc_kmsg_clean(
+ ipc_kmsg_t kmsg);
+
+void ipc_kmsg_clean_body(
+ ipc_kmsg_t kmsg,
+ mach_msg_type_number_t number,
+ mach_msg_descriptor_t *desc);
+
+void ipc_kmsg_clean_partial(
+ ipc_kmsg_t kmsg,
+ mach_msg_type_number_t number,
+ mach_msg_descriptor_t *desc,
+ vm_offset_t paddr,
+ vm_size_t length);
+
+mach_msg_return_t ipc_kmsg_copyin_body(
+ ipc_kmsg_t kmsg,
+ ipc_space_t space,
+ vm_map_t map,
+ mach_msg_option_t *optionp);
+
+
+static void
+ipc_kmsg_link_reply_context_locked(
+ ipc_port_t reply_port,
+ ipc_port_t voucher_port);
+
+static kern_return_t
+ipc_kmsg_validate_reply_port_locked(
+ ipc_port_t reply_port,
+ mach_msg_option_t options);
+
+static mach_msg_return_t
+ipc_kmsg_validate_reply_context_locked(
+ mach_msg_option_t option,
+ ipc_port_t dest_port,
+ ipc_voucher_t voucher,
+ mach_port_name_t voucher_name);
+
+/* we can't include the BSD <sys/persona.h> header here... */
+#ifndef PERSONA_ID_NONE
+#define PERSONA_ID_NONE ((uint32_t)-1)
+#endif
+
+/*
+ * We keep a per-processor cache of kernel message buffers.
+ * The cache saves the overhead/locking of using kalloc/kfree.
+ * The per-processor cache seems to miss less than a per-thread cache,
+ * and it also uses less memory. Access to the cache doesn't
+ * require locking.
+ */
+
+/*
+ * Routine: ikm_set_header
+ * Purpose:
+ * Set the header (and data) pointers for a message. If the
+ * message is small, the data pointer is NULL and all the
+ * data resides within the fixed
+ * the cache, that is best. Otherwise, allocate a new one.
+ * Conditions:
+ * Nothing locked.
+ */
+static void
+ikm_set_header(
+ ipc_kmsg_t kmsg,
+ void *data,
+ mach_msg_size_t mtsize)
+{
+ if (data) {
+ kmsg->ikm_data = data;
+ kmsg->ikm_header = (mach_msg_header_t *)(data + kmsg->ikm_size - mtsize);
+ } else {
+ assert(kmsg->ikm_size == IKM_SAVED_MSG_SIZE);
+ kmsg->ikm_header = (mach_msg_header_t *)
+ ((vm_offset_t)(kmsg + 1) + kmsg->ikm_size - mtsize);
+ }
+}
+
+/*
+ * Routine: ipc_kmsg_alloc
+ * Purpose:
+ * Allocate a kernel message structure. If we can get one from
+ * the cache, that is best. Otherwise, allocate a new one.
+ * Conditions:
+ * Nothing locked.
+ */
+ipc_kmsg_t
+ipc_kmsg_alloc(
+ mach_msg_size_t msg_and_trailer_size)
+{
+ mach_msg_size_t max_expanded_size;
+ ipc_kmsg_t kmsg;
+ void *data;
+
+ /*
+ * LP64support -
+ * Pad the allocation in case we need to expand the
+ * message descriptors for user spaces with pointers larger than
+ * the kernel's own, or vice versa. We don't know how many descriptors
+ * there are yet, so just assume the whole body could be
+ * descriptors (if there could be any at all).
+ *
+ * The expansion space is left in front of the header,
+ * because it is easier to pull the header and descriptors
+ * forward as we process them than it is to push all the
+ * data backwards.
+ */
+ mach_msg_size_t size = msg_and_trailer_size - MAX_TRAILER_SIZE;
+
+ /* compare against implementation upper limit for the body */
+ if (size > ipc_kmsg_max_body_space) {
+ return IKM_NULL;
+ }
+
+ if (size > sizeof(mach_msg_base_t)) {
+ mach_msg_size_t max_desc = (mach_msg_size_t)(((size - sizeof(mach_msg_base_t)) /
+ sizeof(mach_msg_ool_descriptor32_t)) *
+ DESC_SIZE_ADJUSTMENT);
+
+ /* make sure expansion won't cause wrap */
+ if (msg_and_trailer_size > MACH_MSG_SIZE_MAX - max_desc) {
+ return IKM_NULL;
+ }
+
+ max_expanded_size = msg_and_trailer_size + max_desc;
+ } else {
+ max_expanded_size = msg_and_trailer_size;
+ }
+
+ if (max_expanded_size > IKM_SAVED_MSG_SIZE) {
+ data = kheap_alloc(KHEAP_DATA_BUFFERS, max_expanded_size, Z_WAITOK);
+ if (data == NULL) {
+ return IKM_NULL;
+ }
+ } else {
+ data = NULL;
+ max_expanded_size = IKM_SAVED_MSG_SIZE;
+ }
+
+ kmsg = zalloc_flags(ipc_kmsg_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
+ kmsg->ikm_size = max_expanded_size;
+ ikm_qos_init(kmsg);
+ ikm_set_header(kmsg, data, msg_and_trailer_size);
+ assert((kmsg->ikm_prev = kmsg->ikm_next = IKM_BOGUS));
+
+ return kmsg;
+}
+
+/*
+ * Routine: ipc_kmsg_free
+ * Purpose:
+ * Free a kernel message buffer. If the kms is preallocated
+ * to a port, just "put it back (marked unused)." We have to
+ * do this with the port locked. The port may have its hold
+ * on our message released. In that case, we have to just
+ * revert the message to a traditional one and free it normally.
+ * Conditions:
+ * Nothing locked.
+ */
+
+void
+ipc_kmsg_free(
+ ipc_kmsg_t kmsg)
+{
+ mach_msg_size_t size = kmsg->ikm_size;
+ ipc_port_t port;
+
+ assert(!IP_VALID(kmsg->ikm_voucher));
+
+ KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_FREE) | DBG_FUNC_NONE,
+ VM_KERNEL_ADDRPERM((uintptr_t)kmsg),
+ 0, 0, 0, 0);
+
+ /*
+ * Check to see if the message is bound to the port. If so,
+ * mark it not in use. If the port isn't already dead, then
+ * leave the message associated with it. Otherwise, free it.
+ */
+ if (size == IKM_SAVED_MSG_SIZE) {
+ if ((void *)kmsg->ikm_header < (void *)(kmsg + 1) ||
+ (void *)kmsg->ikm_header >= (void *)(kmsg + 1) + IKM_SAVED_MSG_SIZE) {
+ panic("ipc_kmsg_free");
+ }
+ port = ikm_prealloc_inuse_port(kmsg);
+ if (port != IP_NULL) {
+ ip_lock(port);
+ ikm_prealloc_clear_inuse(kmsg, port);
+ if (ip_active(port) && (port->ip_premsg == kmsg)) {
+ assert(IP_PREALLOC(port));
+ ip_unlock(port);
+ ip_release(port);
+ return;
+ }
+ ip_unlock(port);
+ ip_release(port); /* May be last reference */
+ }
+ } else {
+ void *data = kmsg->ikm_data;
+ if ((void *)kmsg->ikm_header < data ||
+ (void *)kmsg->ikm_header >= data + size) {
+ panic("ipc_kmsg_free");
+ }
+ kheap_free(KHEAP_DATA_BUFFERS, data, size);
+ }
+ zfree(ipc_kmsg_zone, kmsg);
+}
+
+
+/*
+ * Routine: ipc_kmsg_enqueue
+ * Purpose:
+ * Enqueue a kmsg.
+ */
+
+void
+ipc_kmsg_enqueue(
+ ipc_kmsg_queue_t queue,
+ ipc_kmsg_t kmsg)
+{
+ ipc_kmsg_t first = queue->ikmq_base;
+ ipc_kmsg_t last;
+
+ if (first == IKM_NULL) {
+ queue->ikmq_base = kmsg;
+ kmsg->ikm_next = kmsg;
+ kmsg->ikm_prev = kmsg;
+ } else {
+ last = first->ikm_prev;
+ kmsg->ikm_next = first;
+ kmsg->ikm_prev = last;
+ first->ikm_prev = kmsg;
+ last->ikm_next = kmsg;
+ }
+}
+
+/*
+ * Routine: ipc_kmsg_enqueue_qos
+ * Purpose:
+ * Enqueue a kmsg, propagating qos
+ * overrides towards the head of the queue.
+ *
+ * Returns:
+ * whether the head of the queue had
+ * it's override-qos adjusted because
+ * of this insertion.
+ */
+
+boolean_t
+ipc_kmsg_enqueue_qos(
+ ipc_kmsg_queue_t queue,
+ ipc_kmsg_t kmsg)
+{
+ ipc_kmsg_t first = queue->ikmq_base;
+ ipc_kmsg_t prev;
+ mach_msg_qos_t qos_ovr;
+
+ if (first == IKM_NULL) {
+ /* insert a first message */
+ queue->ikmq_base = kmsg;
+ kmsg->ikm_next = kmsg;
+ kmsg->ikm_prev = kmsg;
+ return TRUE;
+ }
+
+ /* insert at the tail */
+ prev = first->ikm_prev;
+ kmsg->ikm_next = first;
+ kmsg->ikm_prev = prev;
+ first->ikm_prev = kmsg;
+ prev->ikm_next = kmsg;
+
+ /* apply QoS overrides towards the head */
+ qos_ovr = kmsg->ikm_qos_override;
+ while (prev != kmsg &&
+ qos_ovr > prev->ikm_qos_override) {
+ prev->ikm_qos_override = qos_ovr;
+ prev = prev->ikm_prev;
+ }
+
+ /* did we adjust everything? */
+ return prev == kmsg;
+}
+
+/*
+ * Routine: ipc_kmsg_override_qos
+ * Purpose:
+ * Update the override for a given kmsg already
+ * enqueued, propagating qos override adjustments
+ * towards the head of the queue.
+ *
+ * Returns:
+ * whether the head of the queue had
+ * it's override-qos adjusted because
+ * of this insertion.
+ */
+
+boolean_t
+ipc_kmsg_override_qos(
+ ipc_kmsg_queue_t queue,
+ ipc_kmsg_t kmsg,
+ mach_msg_qos_t qos_ovr)
+{
+ ipc_kmsg_t first = queue->ikmq_base;
+ ipc_kmsg_t cur = kmsg;
+
+ /* apply QoS overrides towards the head */
+ while (qos_ovr > cur->ikm_qos_override) {
+ cur->ikm_qos_override = qos_ovr;
+ if (cur == first) {
+ return TRUE;
+ }
+ cur = cur->ikm_prev;
+ }
+ return FALSE;
+}
+
+/*
+ * Routine: ipc_kmsg_dequeue
+ * Purpose:
+ * Dequeue and return a kmsg.
+ */
+
+ipc_kmsg_t
+ipc_kmsg_dequeue(
+ ipc_kmsg_queue_t queue)
+{
+ ipc_kmsg_t first;
+
+ first = ipc_kmsg_queue_first(queue);
+
+ if (first != IKM_NULL) {
+ ipc_kmsg_rmqueue(queue, first);
+ }
+
+ return first;
+}
+
+/*
+ * Routine: ipc_kmsg_rmqueue
+ * Purpose:
+ * Pull a kmsg out of a queue.
+ */
+
+void
+ipc_kmsg_rmqueue(
+ ipc_kmsg_queue_t queue,
+ ipc_kmsg_t kmsg)
+{
+ ipc_kmsg_t next, prev;
+
+ assert(queue->ikmq_base != IKM_NULL);
+
+ next = kmsg->ikm_next;
+ prev = kmsg->ikm_prev;
+
+ if (next == kmsg) {
+ assert(prev == kmsg);
+ assert(queue->ikmq_base == kmsg);
+
+ queue->ikmq_base = IKM_NULL;
+ } else {
+ if (__improbable(next->ikm_prev != kmsg || prev->ikm_next != kmsg)) {
+ panic("ipc_kmsg_rmqueue: inconsistent prev/next pointers. "
+ "(prev->next: %p, next->prev: %p, kmsg: %p)",
+ prev->ikm_next, next->ikm_prev, kmsg);
+ }
+
+ if (queue->ikmq_base == kmsg) {
+ queue->ikmq_base = next;
+ }
+
+ next->ikm_prev = prev;
+ prev->ikm_next = next;
+ }
+ /* XXX Temporary debug logic */
+ assert((kmsg->ikm_next = IKM_BOGUS) == IKM_BOGUS);
+ assert((kmsg->ikm_prev = IKM_BOGUS) == IKM_BOGUS);
+}
+
+/*
+ * Routine: ipc_kmsg_queue_next
+ * Purpose:
+ * Return the kmsg following the given kmsg.
+ * (Or IKM_NULL if it is the last one in the queue.)
+ */
+
+ipc_kmsg_t
+ipc_kmsg_queue_next(
+ ipc_kmsg_queue_t queue,
+ ipc_kmsg_t kmsg)
+{
+ ipc_kmsg_t next;
+
+ assert(queue->ikmq_base != IKM_NULL);
+
+ next = kmsg->ikm_next;
+ if (queue->ikmq_base == next) {
+ next = IKM_NULL;
+ }
+
+ return next;
+}
+
+/*
+ * Routine: ipc_kmsg_destroy
+ * Purpose:
+ * Destroys a kernel message. Releases all rights,
+ * references, and memory held by the message.
+ * Frees the message.
+ * Conditions:
+ * No locks held.
+ */
+
+void
+ipc_kmsg_destroy(
+ ipc_kmsg_t kmsg)
+{
+ /*
+ * Destroying a message can cause more messages to be destroyed.
+ * Curtail recursion by putting messages on the deferred
+ * destruction queue. If this was the first message on the
+ * queue, this instance must process the full queue.
+ */
+ if (ipc_kmsg_delayed_destroy(kmsg)) {
+ ipc_kmsg_reap_delayed();
+ }
+}
+
+/*
+ * Routine: ipc_kmsg_delayed_destroy
+ * Purpose:
+ * Enqueues a kernel message for deferred destruction.
+ * Returns:
+ * Boolean indicator that the caller is responsible to reap
+ * deferred messages.
+ */
+
+boolean_t
+ipc_kmsg_delayed_destroy(
+ ipc_kmsg_t kmsg)
+{
+ ipc_kmsg_queue_t queue = &(current_thread()->ith_messages);
+ boolean_t first = ipc_kmsg_queue_empty(queue);
+
+ ipc_kmsg_enqueue(queue, kmsg);
+ return first;
+}
+
+/*
+ * Routine: ipc_kmsg_destroy_queue
+ * Purpose:
+ * Destroys messages from the per-thread
+ * deferred reaping queue.
+ * Conditions:
+ * No locks held.
+ */
+
+void
+ipc_kmsg_reap_delayed(void)
+{
+ ipc_kmsg_queue_t queue = &(current_thread()->ith_messages);
+ ipc_kmsg_t kmsg;
+
+ /*
+ * must leave kmsg in queue while cleaning it to assure
+ * no nested calls recurse into here.
+ */
+ while ((kmsg = ipc_kmsg_queue_first(queue)) != IKM_NULL) {
+ ipc_kmsg_clean(kmsg);
+ ipc_kmsg_rmqueue(queue, kmsg);
+ ipc_kmsg_free(kmsg);
+ }
+}
+
+/*
+ * Routine: ipc_kmsg_clean_body
+ * Purpose:
+ * Cleans the body of a kernel message.
+ * Releases all rights, references, and memory.
+ *
+ * Conditions:
+ * No locks held.
+ */
+static unsigned int _ipc_kmsg_clean_invalid_desc = 0;
+void
+ipc_kmsg_clean_body(
+ __unused ipc_kmsg_t kmsg,
+ mach_msg_type_number_t number,
+ mach_msg_descriptor_t *saddr)
+{
+ mach_msg_type_number_t i;
+
+ if (number == 0) {
+ return;
+ }
+
+ for (i = 0; i < number; i++, saddr++) {
+ switch (saddr->type.type) {
+ case MACH_MSG_PORT_DESCRIPTOR: {
+ mach_msg_port_descriptor_t *dsc;
+
+ dsc = &saddr->port;
+
+ /*
+ * Destroy port rights carried in the message
+ */
+ if (!IP_VALID(dsc->name)) {
+ continue;
+ }
+ ipc_object_destroy(ip_to_object(dsc->name), dsc->disposition);
+ break;
+ }
+ case MACH_MSG_OOL_VOLATILE_DESCRIPTOR:
+ case MACH_MSG_OOL_DESCRIPTOR: {
+ mach_msg_ool_descriptor_t *dsc;
+
+ dsc = (mach_msg_ool_descriptor_t *)&saddr->out_of_line;
+
+ /*
+ * Destroy memory carried in the message
+ */
+ if (dsc->size == 0) {
+ assert(dsc->address == (void *) 0);
+ } else {
+ vm_map_copy_discard((vm_map_copy_t) dsc->address);
+ }
+ break;
+ }
+ case MACH_MSG_OOL_PORTS_DESCRIPTOR: {
+ ipc_object_t *objects;
+ mach_msg_type_number_t j;
+ mach_msg_ool_ports_descriptor_t *dsc;
+
+ dsc = (mach_msg_ool_ports_descriptor_t *)&saddr->ool_ports;
+ objects = (ipc_object_t *) dsc->address;
+
+ if (dsc->count == 0) {
+ break;
+ }
+
+ assert(objects != (ipc_object_t *) 0);
+
+ /* destroy port rights carried in the message */
+
+ for (j = 0; j < dsc->count; j++) {
+ ipc_object_t object = objects[j];
+
+ if (!IO_VALID(object)) {
+ continue;
+ }
+
+ ipc_object_destroy(object, dsc->disposition);
+ }
+
+ /* destroy memory carried in the message */
+
+ assert(dsc->count != 0);
+
+ kfree(dsc->address,
+ (vm_size_t) dsc->count * sizeof(mach_port_t));
+ break;
+ }
+ case MACH_MSG_GUARDED_PORT_DESCRIPTOR: {
+ mach_msg_guarded_port_descriptor_t *dsc = (typeof(dsc)) & saddr->guarded_port;
+
+ /*
+ * Destroy port rights carried in the message
+ */
+ if (!IP_VALID(dsc->name)) {
+ continue;
+ }
+ ipc_object_destroy(ip_to_object(dsc->name), dsc->disposition);
+ break;
+ }
+ default: {
+ _ipc_kmsg_clean_invalid_desc++; /* don't understand this type of descriptor */
+ }
+ }
+ }
+}
+
+/*
+ * Routine: ipc_kmsg_clean_partial