]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/kern/affinity.c
xnu-1228.tar.gz
[apple/xnu.git] / osfmk / kern / affinity.c
diff --git a/osfmk/kern/affinity.c b/osfmk/kern/affinity.c
new file mode 100644 (file)
index 0000000..1b319c7
--- /dev/null
@@ -0,0 +1,552 @@
+/*
+ * Copyright (c) 2007 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <kern/affinity.h>
+#include <kern/task.h>
+#include <kern/kalloc.h>
+#include <machine/cpu_affinity.h>
+
+/*
+ * Affinity involves 2 objects:
+ * - affinity namespace:
+ *     shared by a task family, this controls affinity tag lookup and
+ *     allocation; it anchors all affinity sets in one namespace
+ * - affinity set:
+ *     anchors all threads with membership of this affinity set
+ *     and which share an affinity tag in the owning namespace.
+ * 
+ * Locking:
+ * - The task lock protects the creation of an affinity namespace.
+ * - The affinity namespace mutex protects the inheritance of a namespace
+ *   and its thread membership. This includes its destruction when the task
+ *   reference count goes to zero.
+ * - The thread mutex protects a thread's affinity set membership, but in
+ *   addition, the thread_lock is taken to write thread->affinity_set since this
+ *   field (representng the active affinity set) is read by the scheduler.
+ * 
+ * The lock ordering is: task lock, thread mutex, namespace mutex, thread lock.
+ */
+
+#if AFFINITY_DEBUG
+#define DBG(x...)      kprintf("DBG: " x)
+#else
+#define DBG(x...)
+#endif
+
+struct affinity_space {
+       mutex_t         aspc_lock;
+       uint32_t        aspc_task_count;
+       queue_head_t    aspc_affinities;
+};
+typedef struct affinity_space *affinity_space_t;
+
+static affinity_space_t affinity_space_alloc(void);
+static void affinity_space_free(affinity_space_t aspc);
+static affinity_set_t affinity_set_alloc(void);
+static void affinity_set_free(affinity_set_t aset);
+static affinity_set_t affinity_set_find(affinity_space_t aspc, uint32_t tag);
+static void affinity_set_place(affinity_space_t aspc, affinity_set_t aset);
+static void affinity_set_add(affinity_set_t aset, thread_t thread);
+static affinity_set_t affinity_set_remove(affinity_set_t aset, thread_t thread);
+
+/*
+ * The following globals may be modified by the sysctls
+ *   kern.affinity_sets_enabled        - disables hinting if cleared
+ *   kern.affinity_sets_mapping        - controls cache distribution policy
+ * See bsd/kern_sysctl.c
+ */
+boolean_t      affinity_sets_enabled = TRUE;
+int            affinity_sets_mapping = 1;
+
+boolean_t
+thread_affinity_is_supported(void)
+{
+       return (ml_get_max_affinity_sets() != 0);
+}
+
+
+/*
+ * thread_affinity_get() 
+ * Return the affinity tag for a thread. 
+ * Called with the thread mutex held.
+ */
+uint32_t
+thread_affinity_get(thread_t thread)
+{
+       uint32_t tag;
+
+       if (thread->affinity_set != NULL)
+               tag = thread->affinity_set->aset_tag;
+       else
+               tag = THREAD_AFFINITY_TAG_NULL;
+
+       return tag;
+}
+
+
+/*
+ * thread_affinity_set() 
+ * Place a thread in an affinity set identified by a tag.
+ * Called with thread referenced but not locked.
+ */
+kern_return_t
+thread_affinity_set(thread_t thread, uint32_t tag)
+{
+       affinity_set_t          aset;
+       affinity_set_t          empty_aset = NULL;
+       affinity_space_t        aspc;
+       affinity_space_t        new_aspc = NULL;
+
+       DBG("thread_affinity_set(%p,%u)\n", thread, tag);
+
+       task_lock(thread->task);
+       aspc = thread->task->affinity_space;
+       if (aspc == NULL) {
+               task_unlock(thread->task);
+               new_aspc = affinity_space_alloc();
+               if (new_aspc == NULL)
+                       return KERN_RESOURCE_SHORTAGE;
+               task_lock(thread->task);
+               if (thread->task->affinity_space == NULL) {
+                       thread->task->affinity_space = new_aspc;
+                       new_aspc = NULL;
+               }
+               aspc = thread->task->affinity_space;
+       }
+       task_unlock(thread->task);
+       if (new_aspc)
+               affinity_space_free(new_aspc);
+
+       thread_mtx_lock(thread);
+       if (!thread->active) {
+               /* Beaten to lock and the thread is dead */
+               thread_mtx_unlock(thread);
+               return KERN_TERMINATED;
+       }
+
+       mutex_lock(&aspc->aspc_lock);
+       aset = thread->affinity_set;
+       if (aset != NULL) {
+               /*
+                * Remove thread from current affinity set
+                */
+               DBG("thread_affinity_set(%p,%u) removing from aset %p\n",
+                       thread, tag, aset);
+               empty_aset = affinity_set_remove(aset, thread);
+       }
+
+       if (tag != THREAD_AFFINITY_TAG_NULL) {
+               aset = affinity_set_find(aspc, tag);
+               if (aset != NULL) {
+                       /*
+                        * Add thread to existing affinity set
+                        */
+                       DBG("thread_affinity_set(%p,%u) found aset %p\n",
+                               thread, tag, aset);
+               } else {
+                       /*
+                        * Use the new affinity set, add this thread
+                        * and place it in a suitable processor set.
+                        */
+                       if (empty_aset != NULL) {
+                               aset = empty_aset;
+                               empty_aset = NULL;
+                       } else {
+                               aset = affinity_set_alloc();
+                               if (aset == NULL) {
+                                       mutex_unlock(&aspc->aspc_lock);
+                                       thread_mtx_unlock(thread);
+                                       return KERN_RESOURCE_SHORTAGE;
+                               }
+                       }
+                       DBG("thread_affinity_set(%p,%u) (re-)using aset %p\n",
+                               thread, tag, aset);
+                       aset->aset_tag = tag;
+                       affinity_set_place(aspc, aset);
+               }
+               affinity_set_add(aset, thread);
+       }
+
+       mutex_unlock(&aspc->aspc_lock);
+       thread_mtx_unlock(thread);
+
+       /*
+        * If we wound up not using an empty aset we created,
+        * free it here.
+        */
+       if (empty_aset != NULL)
+               affinity_set_free(empty_aset);
+
+       if (thread == current_thread())
+               thread_block(THREAD_CONTINUE_NULL);
+
+       return KERN_SUCCESS;
+}
+
+/*
+ * task_affinity_create()
+ * Called from task create.
+ */
+void
+task_affinity_create(task_t parent_task, task_t child_task)
+{
+       affinity_space_t        aspc = parent_task->affinity_space;
+
+       DBG("task_affinity_create(%p,%p)\n", parent_task, child_task);
+
+       assert(aspc);
+
+       /*
+        * Bump the task reference count on the shared namespace and
+        * give it to the child.
+        */
+       mutex_lock(&aspc->aspc_lock);
+       aspc->aspc_task_count++;
+       child_task->affinity_space = aspc;
+       mutex_unlock(&aspc->aspc_lock);
+}
+
+/*
+ * task_affinity_deallocate()
+ * Called from task_deallocate() when there's a namespace to dereference.
+ */
+void
+task_affinity_deallocate(task_t        task)
+{
+       affinity_space_t        aspc = task->affinity_space;
+
+       DBG("task_affinity_deallocate(%p) aspc %p task_count %d\n",
+               task, aspc, aspc->aspc_task_count);
+
+       mutex_lock(&aspc->aspc_lock);
+       if (--(aspc->aspc_task_count) == 0) {
+               assert(queue_empty(&aspc->aspc_affinities));
+               mutex_unlock(&aspc->aspc_lock);
+               affinity_space_free(aspc);
+       } else {
+               mutex_unlock(&aspc->aspc_lock);
+       }
+}
+
+/*
+ * task_affinity_info()
+ * Return affinity tag info (number, min, max) for the task.
+ */
+kern_return_t
+task_affinity_info(
+       task_t                  task,
+       task_info_t             task_info_out,
+       mach_msg_type_number_t  *task_info_count)
+{
+       affinity_set_t                  aset;
+       affinity_space_t                aspc;
+       task_affinity_tag_info_t        info;
+
+       *task_info_count = TASK_AFFINITY_TAG_INFO_COUNT;
+       info = (task_affinity_tag_info_t) task_info_out;
+       info->set_count = 0;
+       info->task_count = 0;
+       info->min = THREAD_AFFINITY_TAG_NULL;
+       info->max = THREAD_AFFINITY_TAG_NULL;
+
+       task_lock(task);
+       aspc = task->affinity_space;
+       if (aspc) {
+               mutex_lock(&aspc->aspc_lock);
+               queue_iterate(&aspc->aspc_affinities,
+                                aset, affinity_set_t, aset_affinities) {       
+                       info->set_count++;
+                       if (info->min == THREAD_AFFINITY_TAG_NULL ||
+                           aset->aset_tag < (uint32_t) info->min)
+                               info->min = aset->aset_tag;
+                       if (info->max == THREAD_AFFINITY_TAG_NULL ||
+                           aset->aset_tag > (uint32_t) info->max)
+                               info->max = aset->aset_tag;
+               }
+               info->task_count = aspc->aspc_task_count;
+               mutex_unlock(&aspc->aspc_lock);
+       }
+       task_unlock(task);
+       return KERN_SUCCESS;
+}
+
+/*
+ * Called from thread_dup() during fork() with child's mutex held.
+ * Set the child into the parent's affinity set.
+ * Note the affinity space is shared.
+ */
+void
+thread_affinity_dup(thread_t parent, thread_t child)
+{
+       affinity_set_t                  aset;
+       affinity_space_t                aspc;
+
+       thread_mtx_lock(parent);
+       aset = parent->affinity_set;
+       DBG("thread_affinity_dup(%p,%p) aset %p\n", parent, child, aset);
+       if (aset == NULL) {
+               thread_mtx_unlock(parent);
+               return;
+       }
+
+       aspc = aset->aset_space;
+       assert(aspc == parent->task->affinity_space);
+       assert(aspc == child->task->affinity_space);
+
+       mutex_lock(&aspc->aspc_lock);
+       affinity_set_add(aset, child);
+       mutex_unlock(&aspc->aspc_lock);
+
+       thread_mtx_unlock(parent);
+}
+
+/*
+ * thread_affinity_terminate() 
+ * Remove thread from any affinity set.
+ * Called with the thread mutex locked.
+ */
+void
+thread_affinity_terminate(thread_t thread)
+{
+       affinity_set_t          aset = thread->affinity_set;
+       affinity_space_t        aspc;
+
+       DBG("thread_affinity_terminate(%p)\n", thread);
+
+       aspc = aset->aset_space;
+       mutex_lock(&aspc->aspc_lock);
+       if (affinity_set_remove(aset, thread)) {
+               affinity_set_free(aset);
+       }
+       mutex_unlock(&aspc->aspc_lock);
+}
+
+/*
+ * Create an empty affinity namespace data structure.
+ */
+static affinity_space_t
+affinity_space_alloc(void) 
+{
+       affinity_space_t        aspc;
+
+       aspc = (affinity_space_t) kalloc(sizeof(struct affinity_space));
+       if (aspc == NULL)
+               return NULL;
+
+       mutex_init(&aspc->aspc_lock, 0);
+       queue_init(&aspc->aspc_affinities);
+       aspc->aspc_task_count = 1;
+
+       DBG("affinity_space_create() returns %p\n", aspc);
+       return aspc;
+}
+
+/*
+ * Destroy the given empty affinity namespace data structure.
+ */
+static void
+affinity_space_free(affinity_space_t aspc)
+{
+       assert(queue_empty(&aspc->aspc_affinities));
+
+       DBG("affinity_space_free(%p)\n", aspc);
+       kfree(aspc, sizeof(struct affinity_space));
+}
+
+
+/*
+ * Create an empty affinity set data structure
+ * entering it into a list anchored by the owning task.
+ */
+static affinity_set_t
+affinity_set_alloc(void) 
+{
+       affinity_set_t  aset;
+
+       aset = (affinity_set_t) kalloc(sizeof(struct affinity_set));
+       if (aset == NULL)
+               return NULL;
+
+       aset->aset_thread_count = 0;
+       queue_init(&aset->aset_affinities);
+       queue_init(&aset->aset_threads);
+       aset->aset_num = 0;
+       aset->aset_pset = PROCESSOR_SET_NULL;
+       aset->aset_space = NULL;
+
+       DBG("affinity_set_create() returns %p\n", aset);
+       return aset;
+}
+
+/*
+ * Destroy the given empty affinity set data structure
+ * after removing it from the parent task.
+ */
+static void
+affinity_set_free(affinity_set_t aset)
+{
+       assert(queue_empty(&aset->aset_threads));
+
+       DBG("affinity_set_free(%p)\n", aset);
+       kfree(aset, sizeof(struct affinity_set));
+}
+
+/*
+ * Add a thread to an affinity set.
+ * The caller must have the thread mutex and space locked.
+ */
+static void
+affinity_set_add(affinity_set_t aset, thread_t thread)
+{
+       spl_t   s;
+
+       DBG("affinity_set_add(%p,%p)\n", aset, thread);
+       queue_enter(&aset->aset_threads,
+               thread, thread_t, affinity_threads);
+       aset->aset_thread_count++;
+       s = splsched();
+       thread_lock(thread);
+       thread->affinity_set = affinity_sets_enabled ? aset : NULL;
+       thread_unlock(thread);
+       splx(s);
+}
+
+/*
+ * Remove a thread from an affinity set returning the set if now empty.
+ * The caller must have the thread mutex and space locked.
+ */
+static affinity_set_t
+affinity_set_remove(affinity_set_t aset, thread_t thread)
+{
+       spl_t   s;
+
+       s = splsched();
+       thread_lock(thread);
+       thread->affinity_set = NULL;
+       thread_unlock(thread);
+       splx(s);
+
+       aset->aset_thread_count--;
+       queue_remove(&aset->aset_threads,
+               thread, thread_t, affinity_threads);
+       if (queue_empty(&aset->aset_threads)) {
+               queue_remove(&aset->aset_space->aspc_affinities,
+                               aset, affinity_set_t, aset_affinities);
+               assert(aset->aset_thread_count == 0);
+               aset->aset_tag = THREAD_AFFINITY_TAG_NULL;
+               aset->aset_num = 0;
+               aset->aset_pset = PROCESSOR_SET_NULL;
+               aset->aset_space = NULL;
+               DBG("affinity_set_remove(%p,%p) set now empty\n", aset, thread);
+               return aset;
+       } else {
+               DBG("affinity_set_remove(%p,%p)\n", aset, thread);
+               return NULL;
+       }
+}
+
+/*
+ * Find an affinity set in the parent task with the given affinity tag.
+ * The caller must have the space locked.
+ */
+static affinity_set_t
+affinity_set_find(affinity_space_t space, uint32_t tag)
+{
+       affinity_set_t  aset;
+
+       queue_iterate(&space->aspc_affinities,
+                        aset, affinity_set_t, aset_affinities) {       
+               if (aset->aset_tag == tag) {
+                       DBG("affinity_set_find(%p,%u) finds %p\n",
+                           space, tag, aset);
+                       return aset;
+               }
+       }
+       DBG("affinity_set_find(%p,%u) not found\n", space, tag);
+       return NULL;
+}
+
+/*
+ * affinity_set_place() assigns an affinity set to a suitable processor_set.
+ * The selection criteria is:
+ *  - the set currently occupied by the least number of affinities
+ *    belonging to the owning the task.
+ * The caller must have the space locked.
+ */
+static void
+affinity_set_place(affinity_space_t aspc, affinity_set_t new_aset)
+{
+       unsigned int    num_cpu_asets = ml_get_max_affinity_sets();
+       unsigned int    set_occupancy[num_cpu_asets];
+       unsigned int    i;
+       unsigned int    i_least_occupied;
+       affinity_set_t  aset;
+
+       for (i = 0; i < num_cpu_asets; i++)
+               set_occupancy[i] = 0;
+
+       /*
+        * Scan the affinity sets calculating the number of sets
+        * occupy the available physical affinities.
+        */
+       queue_iterate(&aspc->aspc_affinities,
+                        aset, affinity_set_t, aset_affinities) {
+               set_occupancy[aset->aset_num]++;
+       }
+
+       /*
+        * Find the least occupied set (or the first empty set).
+        * To distribute placements somewhat, start searching from
+        * a cpu affinity chosen randomly per namespace:
+        *   [(unsigned int)aspc % 127] % num_cpu_asets
+        * unless this mapping policy is overridden.
+        */
+       if (affinity_sets_mapping == 0)
+               i_least_occupied = 0;
+       else
+               i_least_occupied = ((unsigned int)aspc % 127) % num_cpu_asets;
+       for (i = 0; i < num_cpu_asets; i++) {
+               unsigned int    j = (i_least_occupied + i) % num_cpu_asets;
+               if (set_occupancy[j] == 0) {
+                       i_least_occupied = j;
+                       break;
+               }
+               if (set_occupancy[j] < set_occupancy[i_least_occupied])
+                       i_least_occupied = j;
+       }
+       new_aset->aset_num = i_least_occupied;
+       new_aset->aset_pset = ml_affinity_to_pset(i_least_occupied);
+
+       /* Add the new affinity set to the group */
+       new_aset->aset_space = aspc;
+       queue_enter(&aspc->aspc_affinities,
+                       new_aset, affinity_set_t, aset_affinities);
+
+       DBG("affinity_set_place(%p,%p) selected affinity %u pset %p\n",
+           aspc, new_aset, new_aset->aset_num, new_aset->aset_pset);
+}