+ /*
+ * check to see not only if ktracing is enabled, but if we will
+ * _actually_ emit the KMSG_INFO tracepoint. This saves us a
+ * significant amount of processing (and a port lock hold) in
+ * the non-tracing case.
+ */
+ if (__probable((kdebug_enable & KDEBUG_TRACE) == 0)) {
+ return;
+ }
+ if (!kdebug_debugid_enabled(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO))) {
+ return;
+ }
+
+ msg = kmsg->ikm_header;
+
+ dst_port = msg->msgh_remote_port;
+ if (!IPC_PORT_VALID(dst_port)) {
+ return;
+ }
+
+ /*
+ * Message properties / options
+ */
+ if ((option & (MACH_SEND_MSG | MACH_RCV_MSG)) == (MACH_SEND_MSG | MACH_RCV_MSG)) {
+ msg_flags |= KMSG_TRACE_FLAG_SNDRCV;
+ }
+
+ if (msg->msgh_id >= is_iokit_subsystem.start &&
+ msg->msgh_id < is_iokit_subsystem.end + 100) {
+ msg_flags |= KMSG_TRACE_FLAG_IOKIT;
+ }
+ /* magic XPC checkin message id (XPC_MESSAGE_ID_CHECKIN) from libxpc */
+ else if (msg->msgh_id == 0x77303074u /* w00t */) {
+ msg_flags |= KMSG_TRACE_FLAG_CHECKIN;
+ }
+
+ if (msg->msgh_bits & MACH_MSGH_BITS_RAISEIMP) {
+ msg_flags |= KMSG_TRACE_FLAG_RAISEIMP;
+ }
+
+ if (unsafe_convert_port_to_voucher(kmsg->ikm_voucher)) {
+ msg_flags |= KMSG_TRACE_FLAG_VOUCHER;
+ }
+
+ /*
+ * Sending task / port
+ */
+ send_task = current_task();
+ send_pid = task_pid(send_task);
+
+ if (send_pid != 0) {
+ if (task_is_daemon(send_task)) {
+ msg_flags |= KMSG_TRACE_FLAG_DAEMON_SRC;
+ } else if (task_is_app(send_task)) {
+ msg_flags |= KMSG_TRACE_FLAG_APP_SRC;
+ }
+ }
+
+ is_task_64bit = (send_task->map->max_offset > VM_MAX_ADDRESS);
+ if (is_task_64bit) {
+ msg_flags |= KMSG_TRACE_FLAG_SND64;
+ }
+
+ src_port = msg->msgh_local_port;
+ if (src_port) {
+ if (src_port->ip_messages.imq_qlimit != MACH_PORT_QLIMIT_DEFAULT) {
+ msg_flags |= KMSG_TRACE_FLAG_SRC_NDFLTQ;
+ }
+ switch (MACH_MSGH_BITS_LOCAL(msg->msgh_bits)) {
+ case MACH_MSG_TYPE_MOVE_SEND_ONCE:
+ msg_flags |= KMSG_TRACE_FLAG_SRC_SONCE;
+ break;
+ default:
+ break;
+ }
+ } else {
+ msg_flags |= KMSG_TRACE_FLAG_ONEWAY;
+ }
+
+
+ /*
+ * Destination task / port
+ */
+ ip_lock(dst_port);
+ if (!ip_active(dst_port)) {
+ /* dst port is being torn down */
+ dst_pid = (uint32_t)0xfffffff0;
+ } else if (dst_port->ip_tempowner) {
+ msg_flags |= KMSG_TRACE_FLAG_DTMPOWNER;
+ if (IIT_NULL != dst_port->ip_imp_task) {
+ dst_pid = task_pid(dst_port->ip_imp_task->iit_task);
+ } else {
+ dst_pid = (uint32_t)0xfffffff1;
+ }
+ } else if (dst_port->ip_receiver_name == MACH_PORT_NULL) {
+ /* dst_port is otherwise in-transit */
+ dst_pid = (uint32_t)0xfffffff2;
+ } else {
+ if (dst_port->ip_receiver == ipc_space_kernel) {
+ dst_pid = 0;
+ } else {
+ ipc_space_t dst_space;
+ dst_space = dst_port->ip_receiver;
+ if (dst_space && is_active(dst_space)) {
+ dst_pid = task_pid(dst_space->is_task);
+ if (task_is_daemon(dst_space->is_task)) {
+ msg_flags |= KMSG_TRACE_FLAG_DAEMON_DST;
+ } else if (task_is_app(dst_space->is_task)) {
+ msg_flags |= KMSG_TRACE_FLAG_APP_DST;
+ }
+ } else {
+ /* receiving task is being torn down */
+ dst_pid = (uint32_t)0xfffffff3;
+ }
+ }
+ }
+
+ if (dst_port->ip_messages.imq_qlimit != MACH_PORT_QLIMIT_DEFAULT) {
+ msg_flags |= KMSG_TRACE_FLAG_DST_NDFLTQ;
+ }
+ if (imq_full(&dst_port->ip_messages)) {
+ msg_flags |= KMSG_TRACE_FLAG_DSTQFULL;
+ }
+
+ kotype = ip_kotype(dst_port);
+
+ ip_unlock(dst_port);
+
+ switch (kotype) {
+ case IKOT_SEMAPHORE:
+ msg_flags |= KMSG_TRACE_FLAG_SEMA;
+ break;
+ case IKOT_TIMER:
+ case IKOT_CLOCK:
+ msg_flags |= KMSG_TRACE_FLAG_TIMER;
+ break;
+ case IKOT_MASTER_DEVICE:
+ case IKOT_IOKIT_CONNECT:
+ case IKOT_IOKIT_OBJECT:
+ case IKOT_IOKIT_IDENT:
+ case IKOT_UEXT_OBJECT:
+ msg_flags |= KMSG_TRACE_FLAG_IOKIT;
+ break;
+ default:
+ break;
+ }
+
+ switch (MACH_MSGH_BITS_REMOTE(msg->msgh_bits)) {
+ case MACH_MSG_TYPE_PORT_SEND_ONCE:
+ msg_flags |= KMSG_TRACE_FLAG_DST_SONCE;
+ break;
+ default:
+ break;
+ }
+
+
+ /*
+ * Message size / content
+ */
+ msg_size = msg->msgh_size - sizeof(mach_msg_header_t);
+
+ if (msg->msgh_bits & MACH_MSGH_BITS_COMPLEX) {
+ mach_msg_body_t *msg_body;
+ mach_msg_descriptor_t *kern_dsc;
+ int dsc_count;
+
+ msg_flags |= KMSG_TRACE_FLAG_COMPLEX;
+
+ msg_body = (mach_msg_body_t *)(kmsg->ikm_header + 1);
+ dsc_count = (int)msg_body->msgh_descriptor_count;
+ kern_dsc = (mach_msg_descriptor_t *)(msg_body + 1);
+
+ /* this is gross: see ipc_kmsg_copyin_body()... */
+ if (!is_task_64bit) {
+ msg_size -= (dsc_count * 12);
+ }
+
+ for (int i = 0; i < dsc_count; i++) {
+ switch (kern_dsc[i].type.type) {
+ case MACH_MSG_PORT_DESCRIPTOR:
+ num_ports++;
+ if (is_task_64bit) {
+ msg_size -= 12;
+ }
+ break;
+ case MACH_MSG_OOL_VOLATILE_DESCRIPTOR:
+ case MACH_MSG_OOL_DESCRIPTOR: {
+ mach_msg_ool_descriptor_t *dsc;
+ dsc = (mach_msg_ool_descriptor_t *)&kern_dsc[i];
+ msg_flags |= KMSG_TRACE_FLAG_OOLMEM;
+ msg_size += dsc->size;
+ if ((dsc->size >= MSG_OOL_SIZE_SMALL) &&
+ (dsc->copy == MACH_MSG_PHYSICAL_COPY) &&
+ !dsc->deallocate) {
+ msg_flags |= KMSG_TRACE_FLAG_PCPY;
+ } else if (dsc->size <= MSG_OOL_SIZE_SMALL) {
+ msg_flags |= KMSG_TRACE_FLAG_PCPY;
+ } else {
+ msg_flags |= KMSG_TRACE_FLAG_VCPY;
+ }
+ if (is_task_64bit) {
+ msg_size -= 16;
+ }
+ } break;
+ case MACH_MSG_OOL_PORTS_DESCRIPTOR: {
+ mach_msg_ool_ports_descriptor_t *dsc;
+ dsc = (mach_msg_ool_ports_descriptor_t *)&kern_dsc[i];
+ num_ports += dsc->count;
+ if (is_task_64bit) {
+ msg_size -= 16;
+ }
+ } break;
+ case MACH_MSG_GUARDED_PORT_DESCRIPTOR:
+ num_ports++;
+ msg_flags |= KMSG_TRACE_FLAG_GUARDED_DESC;
+ if (is_task_64bit) {
+ msg_size -= 16;
+ }
+ default:
+ break;
+ }
+ }
+ }
+
+ /*
+ * Trailer contents
+ */
+ trailer = (mach_msg_trailer_t *)((vm_offset_t)msg +
+ round_msg((vm_offset_t)msg->msgh_size));
+ if (trailer->msgh_trailer_size <= sizeof(mach_msg_security_trailer_t)) {
+ extern const security_token_t KERNEL_SECURITY_TOKEN;
+ mach_msg_security_trailer_t *strailer;
+ strailer = (mach_msg_security_trailer_t *)trailer;
+ /*
+ * verify the sender PID: replies from the kernel often look
+ * like self-talk because the sending port is not reset.
+ */
+ if (memcmp(&strailer->msgh_sender,
+ &KERNEL_SECURITY_TOKEN,
+ sizeof(KERNEL_SECURITY_TOKEN)) == 0) {
+ send_pid = 0;
+ msg_flags &= ~(KMSG_TRACE_FLAG_APP_SRC | KMSG_TRACE_FLAG_DAEMON_SRC);
+ }
+ }
+
+ KDBG(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_INFO) | DBG_FUNC_END,
+ (uintptr_t)send_pid,
+ (uintptr_t)dst_pid,
+ (uintptr_t)msg_size,
+ (uintptr_t)(
+ ((msg_flags & KMSG_TRACE_FLAGS_MASK) << KMSG_TRACE_FLAGS_SHIFT) |
+ ((num_ports & KMSG_TRACE_PORTS_MASK) << KMSG_TRACE_PORTS_SHIFT)
+ )
+ );
+}
+#endif
+
+/* zone for cached ipc_kmsg_t structures */
+zone_t ipc_kmsg_zone;
+
+/*
+ * Forward declarations
+ */
+
+void ipc_kmsg_clean(
+ ipc_kmsg_t kmsg);
+
+void ipc_kmsg_clean_body(
+ ipc_kmsg_t kmsg,
+ mach_msg_type_number_t number,
+ mach_msg_descriptor_t *desc);
+
+void ipc_kmsg_clean_partial(
+ ipc_kmsg_t kmsg,
+ mach_msg_type_number_t number,
+ mach_msg_descriptor_t *desc,
+ vm_offset_t paddr,
+ vm_size_t length);
+
+mach_msg_return_t ipc_kmsg_copyin_body(
+ ipc_kmsg_t kmsg,
+ ipc_space_t space,
+ vm_map_t map,
+ mach_msg_option_t *optionp);
+
+
+extern int enforce_strict_reply;
+
+static void
+ipc_kmsg_link_reply_context_locked(
+ ipc_port_t reply_port,
+ ipc_port_t voucher_port);
+
+static kern_return_t
+ipc_kmsg_validate_reply_port_locked(
+ ipc_port_t reply_port,
+ mach_msg_option_t options);
+
+static mach_msg_return_t
+ipc_kmsg_validate_reply_context_locked(
+ mach_msg_option_t option,
+ ipc_port_t dest_port,
+ ipc_voucher_t voucher,
+ mach_port_name_t voucher_name);
+
+/* we can't include the BSD <sys/persona.h> header here... */
+#ifndef PERSONA_ID_NONE
+#define PERSONA_ID_NONE ((uint32_t)-1)
+#endif
+
+/*
+ * We keep a per-processor cache of kernel message buffers.
+ * The cache saves the overhead/locking of using kalloc/kfree.
+ * The per-processor cache seems to miss less than a per-thread cache,
+ * and it also uses less memory. Access to the cache doesn't
+ * require locking.
+ */
+
+/*
+ * Routine: ipc_kmsg_alloc
+ * Purpose:
+ * Allocate a kernel message structure. If we can get one from
+ * the cache, that is best. Otherwise, allocate a new one.
+ * Conditions:
+ * Nothing locked.
+ */
+ipc_kmsg_t
+ipc_kmsg_alloc(
+ mach_msg_size_t msg_and_trailer_size)
+{
+ mach_msg_size_t max_expanded_size;
+ ipc_kmsg_t kmsg;
+
+ /*
+ * LP64support -
+ * Pad the allocation in case we need to expand the
+ * message descriptors for user spaces with pointers larger than
+ * the kernel's own, or vice versa. We don't know how many descriptors
+ * there are yet, so just assume the whole body could be
+ * descriptors (if there could be any at all).
+ *
+ * The expansion space is left in front of the header,
+ * because it is easier to pull the header and descriptors
+ * forward as we process them than it is to push all the
+ * data backwards.
+ */
+ mach_msg_size_t size = msg_and_trailer_size - MAX_TRAILER_SIZE;
+
+ /* compare against implementation upper limit for the body */
+ if (size > ipc_kmsg_max_body_space) {
+ return IKM_NULL;
+ }
+
+ if (size > sizeof(mach_msg_base_t)) {
+ mach_msg_size_t max_desc = (mach_msg_size_t)(((size - sizeof(mach_msg_base_t)) /
+ sizeof(mach_msg_ool_descriptor32_t)) *
+ DESC_SIZE_ADJUSTMENT);
+
+ /* make sure expansion won't cause wrap */
+ if (msg_and_trailer_size > MACH_MSG_SIZE_MAX - max_desc) {
+ return IKM_NULL;
+ }
+
+ max_expanded_size = msg_and_trailer_size + max_desc;
+ } else {
+ max_expanded_size = msg_and_trailer_size;
+ }
+
+ if (max_expanded_size < IKM_SAVED_MSG_SIZE) {
+ max_expanded_size = IKM_SAVED_MSG_SIZE; /* round up for ikm_cache */
+ }
+ if (max_expanded_size == IKM_SAVED_MSG_SIZE) {
+ kmsg = (ipc_kmsg_t)zalloc(ipc_kmsg_zone);
+ } else {
+ kmsg = (ipc_kmsg_t)kalloc(ikm_plus_overhead(max_expanded_size));
+ }
+
+ if (kmsg != IKM_NULL) {
+ ikm_init(kmsg, max_expanded_size);
+ ikm_set_header(kmsg, msg_and_trailer_size);
+ }
+
+ return kmsg;
+}
+
+/*
+ * Routine: ipc_kmsg_free
+ * Purpose:
+ * Free a kernel message buffer. If the kms is preallocated
+ * to a port, just "put it back (marked unused)." We have to
+ * do this with the port locked. The port may have its hold
+ * on our message released. In that case, we have to just
+ * revert the message to a traditional one and free it normally.
+ * Conditions:
+ * Nothing locked.
+ */
+
+void
+ipc_kmsg_free(
+ ipc_kmsg_t kmsg)
+{
+ mach_msg_size_t size = kmsg->ikm_size;
+ ipc_port_t port;
+
+ assert(!IP_VALID(kmsg->ikm_voucher));
+
+ KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_KMSG_FREE) | DBG_FUNC_NONE,
+ VM_KERNEL_ADDRPERM((uintptr_t)kmsg),
+ 0, 0, 0, 0);
+
+ /*
+ * Check to see if the message is bound to the port. If so,
+ * mark it not in use. If the port isn't already dead, then
+ * leave the message associated with it. Otherwise, free it.
+ */
+ port = ikm_prealloc_inuse_port(kmsg);
+ if (port != IP_NULL) {
+ ip_lock(port);
+ ikm_prealloc_clear_inuse(kmsg, port);
+ if (ip_active(port) && (port->ip_premsg == kmsg)) {
+ assert(IP_PREALLOC(port));
+ ip_unlock(port);
+ ip_release(port);
+ return;
+ }
+ ip_unlock(port);
+ ip_release(port); /* May be last reference */
+ }
+
+ if (kmsg->ikm_size == IKM_SAVED_MSG_SIZE) {
+ zfree(ipc_kmsg_zone, kmsg);
+ return;
+ }
+ kfree(kmsg, ikm_plus_overhead(size));
+}
+
+
+/*
+ * Routine: ipc_kmsg_enqueue
+ * Purpose:
+ * Enqueue a kmsg.
+ */
+
+void
+ipc_kmsg_enqueue(
+ ipc_kmsg_queue_t queue,
+ ipc_kmsg_t kmsg)
+{
+ ipc_kmsg_t first = queue->ikmq_base;
+ ipc_kmsg_t last;
+
+ if (first == IKM_NULL) {
+ queue->ikmq_base = kmsg;
+ kmsg->ikm_next = kmsg;
+ kmsg->ikm_prev = kmsg;
+ } else {
+ last = first->ikm_prev;
+ kmsg->ikm_next = first;
+ kmsg->ikm_prev = last;
+ first->ikm_prev = kmsg;
+ last->ikm_next = kmsg;
+ }
+}
+
+/*
+ * Routine: ipc_kmsg_enqueue_qos
+ * Purpose:
+ * Enqueue a kmsg, propagating qos
+ * overrides towards the head of the queue.
+ *
+ * Returns:
+ * whether the head of the queue had
+ * it's override-qos adjusted because
+ * of this insertion.
+ */
+
+boolean_t
+ipc_kmsg_enqueue_qos(
+ ipc_kmsg_queue_t queue,
+ ipc_kmsg_t kmsg)
+{
+ ipc_kmsg_t first = queue->ikmq_base;
+ ipc_kmsg_t prev;
+ mach_msg_priority_t override;
+
+ if (first == IKM_NULL) {
+ /* insert a first message */
+ queue->ikmq_base = kmsg;
+ kmsg->ikm_next = kmsg;
+ kmsg->ikm_prev = kmsg;
+ return TRUE;
+ }
+
+ /* insert at the tail */
+ prev = first->ikm_prev;
+ kmsg->ikm_next = first;
+ kmsg->ikm_prev = prev;
+ first->ikm_prev = kmsg;
+ prev->ikm_next = kmsg;
+
+ /* apply QoS overrides towards the head */
+ override = kmsg->ikm_qos_override;
+ while (prev != kmsg &&
+ override > prev->ikm_qos_override) {
+ prev->ikm_qos_override = override;
+ prev = prev->ikm_prev;
+ }
+
+ /* did we adjust everything? */
+ return prev == kmsg;
+}
+
+/*
+ * Routine: ipc_kmsg_override_qos
+ * Purpose:
+ * Update the override for a given kmsg already
+ * enqueued, propagating qos override adjustments
+ * towards the head of the queue.
+ *
+ * Returns:
+ * whether the head of the queue had
+ * it's override-qos adjusted because
+ * of this insertion.
+ */
+
+boolean_t
+ipc_kmsg_override_qos(
+ ipc_kmsg_queue_t queue,
+ ipc_kmsg_t kmsg,
+ mach_msg_priority_t override)
+{
+ ipc_kmsg_t first = queue->ikmq_base;
+ ipc_kmsg_t cur = kmsg;
+
+ /* apply QoS overrides towards the head */
+ while (override > cur->ikm_qos_override) {
+ cur->ikm_qos_override = override;
+ if (cur == first) {
+ return TRUE;
+ }
+ cur = cur->ikm_prev;
+ }
+ return FALSE;
+}
+
+/*
+ * Routine: ipc_kmsg_dequeue
+ * Purpose:
+ * Dequeue and return a kmsg.
+ */
+
+ipc_kmsg_t
+ipc_kmsg_dequeue(
+ ipc_kmsg_queue_t queue)
+{
+ ipc_kmsg_t first;
+
+ first = ipc_kmsg_queue_first(queue);
+
+ if (first != IKM_NULL) {
+ ipc_kmsg_rmqueue(queue, first);
+ }
+
+ return first;
+}
+
+/*
+ * Routine: ipc_kmsg_rmqueue
+ * Purpose:
+ * Pull a kmsg out of a queue.
+ */
+
+void
+ipc_kmsg_rmqueue(
+ ipc_kmsg_queue_t queue,
+ ipc_kmsg_t kmsg)
+{
+ ipc_kmsg_t next, prev;
+
+ assert(queue->ikmq_base != IKM_NULL);
+
+ next = kmsg->ikm_next;
+ prev = kmsg->ikm_prev;
+
+ if (next == kmsg) {
+ assert(prev == kmsg);
+ assert(queue->ikmq_base == kmsg);
+
+ queue->ikmq_base = IKM_NULL;
+ } else {
+ if (__improbable(next->ikm_prev != kmsg || prev->ikm_next != kmsg)) {
+ panic("ipc_kmsg_rmqueue: inconsistent prev/next pointers. "
+ "(prev->next: %p, next->prev: %p, kmsg: %p)",
+ prev->ikm_next, next->ikm_prev, kmsg);
+ }
+
+ if (queue->ikmq_base == kmsg) {
+ queue->ikmq_base = next;
+ }
+
+ next->ikm_prev = prev;
+ prev->ikm_next = next;
+ }
+ /* XXX Temporary debug logic */
+ assert((kmsg->ikm_next = IKM_BOGUS) == IKM_BOGUS);
+ assert((kmsg->ikm_prev = IKM_BOGUS) == IKM_BOGUS);
+}
+
+/*
+ * Routine: ipc_kmsg_queue_next
+ * Purpose:
+ * Return the kmsg following the given kmsg.
+ * (Or IKM_NULL if it is the last one in the queue.)
+ */
+
+ipc_kmsg_t
+ipc_kmsg_queue_next(
+ ipc_kmsg_queue_t queue,
+ ipc_kmsg_t kmsg)
+{
+ ipc_kmsg_t next;
+
+ assert(queue->ikmq_base != IKM_NULL);
+
+ next = kmsg->ikm_next;
+ if (queue->ikmq_base == next) {
+ next = IKM_NULL;
+ }
+
+ return next;
+}
+
+/*
+ * Routine: ipc_kmsg_destroy
+ * Purpose:
+ * Destroys a kernel message. Releases all rights,
+ * references, and memory held by the message.
+ * Frees the message.
+ * Conditions:
+ * No locks held.
+ */
+
+void
+ipc_kmsg_destroy(
+ ipc_kmsg_t kmsg)
+{
+ /*
+ * Destroying a message can cause more messages to be destroyed.
+ * Curtail recursion by putting messages on the deferred
+ * destruction queue. If this was the first message on the
+ * queue, this instance must process the full queue.
+ */
+ if (ipc_kmsg_delayed_destroy(kmsg)) {
+ ipc_kmsg_reap_delayed();
+ }
+}
+
+/*
+ * Routine: ipc_kmsg_delayed_destroy
+ * Purpose:
+ * Enqueues a kernel message for deferred destruction.
+ * Returns:
+ * Boolean indicator that the caller is responsible to reap
+ * deferred messages.
+ */
+
+boolean_t
+ipc_kmsg_delayed_destroy(
+ ipc_kmsg_t kmsg)
+{
+ ipc_kmsg_queue_t queue = &(current_thread()->ith_messages);
+ boolean_t first = ipc_kmsg_queue_empty(queue);
+
+ ipc_kmsg_enqueue(queue, kmsg);
+ return first;
+}
+
+/*
+ * Routine: ipc_kmsg_destroy_queue
+ * Purpose:
+ * Destroys messages from the per-thread
+ * deferred reaping queue.
+ * Conditions:
+ * No locks held.
+ */
+
+void
+ipc_kmsg_reap_delayed(void)
+{
+ ipc_kmsg_queue_t queue = &(current_thread()->ith_messages);
+ ipc_kmsg_t kmsg;
+
+ /*
+ * must leave kmsg in queue while cleaning it to assure
+ * no nested calls recurse into here.
+ */
+ while ((kmsg = ipc_kmsg_queue_first(queue)) != IKM_NULL) {
+ ipc_kmsg_clean(kmsg);
+ ipc_kmsg_rmqueue(queue, kmsg);
+ ipc_kmsg_free(kmsg);
+ }
+}