2  * Copyright (c) 2007 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  29 #include <kern/affinity.h> 
  30 #include <kern/task.h> 
  31 #include <kern/kalloc.h> 
  32 #include <machine/cpu_affinity.h> 
  35  * Affinity involves 2 objects: 
  36  * - affinity namespace: 
  37  *      shared by a task family, this controls affinity tag lookup and 
  38  *      allocation; it anchors all affinity sets in one namespace 
  40  *      anchors all threads with membership of this affinity set 
  41  *      and which share an affinity tag in the owning namespace. 
  44  * - The task lock protects the creation of an affinity namespace. 
  45  * - The affinity namespace mutex protects the inheritance of a namespace 
  46  *   and its thread membership. This includes its destruction when the task 
  47  *   reference count goes to zero. 
  48  * - The thread mutex protects a thread's affinity set membership, but in 
  49  *   addition, the thread_lock is taken to write thread->affinity_set since this 
  50  *   field (representng the active affinity set) is read by the scheduler. 
  52  * The lock ordering is: task lock, thread mutex, namespace mutex, thread lock. 
  56 #define DBG(x...)       kprintf("DBG: " x) 
  61 struct affinity_space 
{ 
  63         uint32_t                aspc_task_count
; 
  64         queue_head_t    aspc_affinities
; 
  66 typedef struct affinity_space 
*affinity_space_t
; 
  68 static affinity_space_t 
affinity_space_alloc(void); 
  69 static void affinity_space_free(affinity_space_t aspc
); 
  70 static affinity_set_t 
affinity_set_alloc(void); 
  71 static void affinity_set_free(affinity_set_t aset
); 
  72 static affinity_set_t 
affinity_set_find(affinity_space_t aspc
, uint32_t tag
); 
  73 static void affinity_set_place(affinity_space_t aspc
, affinity_set_t aset
); 
  74 static void affinity_set_add(affinity_set_t aset
, thread_t thread
); 
  75 static affinity_set_t 
affinity_set_remove(affinity_set_t aset
, thread_t thread
); 
  78  * The following globals may be modified by the sysctls 
  79  *   kern.affinity_sets_enabled - disables hinting if cleared 
  80  *   kern.affinity_sets_mapping - controls cache distribution policy 
  81  * See bsd/kern_sysctl.c 
  83  * Affinity sets are not used on embedded, which typically only 
  84  * has a single pset, and last-processor affinity is 
  85  * more important than pset affinity. 
  88 boolean_t       affinity_sets_enabled 
= FALSE
; 
  89 int             affinity_sets_mapping 
= 0; 
  90 #else /* !CONFIG_EMBEDDED */ 
  91 boolean_t       affinity_sets_enabled 
= TRUE
; 
  92 int             affinity_sets_mapping 
= 1; 
  93 #endif /* !CONFIG_EMBEDDED */ 
  96 thread_affinity_is_supported(void) 
  98         return ml_get_max_affinity_sets() != 0; 
 103  * thread_affinity_get() 
 104  * Return the affinity tag for a thread. 
 105  * Called with the thread mutex held. 
 108 thread_affinity_get(thread_t thread
) 
 112         if (thread
->affinity_set 
!= NULL
) { 
 113                 tag 
= thread
->affinity_set
->aset_tag
; 
 115                 tag 
= THREAD_AFFINITY_TAG_NULL
; 
 123  * thread_affinity_set() 
 124  * Place a thread in an affinity set identified by a tag. 
 125  * Called with thread referenced but not locked. 
 128 thread_affinity_set(thread_t thread
, uint32_t tag
) 
 131         affinity_set_t          empty_aset 
= NULL
; 
 132         affinity_space_t        aspc
; 
 133         affinity_space_t        new_aspc 
= NULL
; 
 135         DBG("thread_affinity_set(%p,%u)\n", thread
, tag
); 
 137         task_lock(thread
->task
); 
 138         aspc 
= thread
->task
->affinity_space
; 
 140                 task_unlock(thread
->task
); 
 141                 new_aspc 
= affinity_space_alloc(); 
 142                 if (new_aspc 
== NULL
) { 
 143                         return KERN_RESOURCE_SHORTAGE
; 
 145                 task_lock(thread
->task
); 
 146                 if (thread
->task
->affinity_space 
== NULL
) { 
 147                         thread
->task
->affinity_space 
= new_aspc
; 
 150                 aspc 
= thread
->task
->affinity_space
; 
 152         task_unlock(thread
->task
); 
 154                 affinity_space_free(new_aspc
); 
 157         thread_mtx_lock(thread
); 
 158         if (!thread
->active
) { 
 159                 /* Beaten to lock and the thread is dead */ 
 160                 thread_mtx_unlock(thread
); 
 161                 return KERN_TERMINATED
; 
 164         lck_mtx_lock(&aspc
->aspc_lock
); 
 165         aset 
= thread
->affinity_set
; 
 168                  * Remove thread from current affinity set 
 170                 DBG("thread_affinity_set(%p,%u) removing from aset %p\n", 
 172                 empty_aset 
= affinity_set_remove(aset
, thread
); 
 175         if (tag 
!= THREAD_AFFINITY_TAG_NULL
) { 
 176                 aset 
= affinity_set_find(aspc
, tag
); 
 179                          * Add thread to existing affinity set 
 181                         DBG("thread_affinity_set(%p,%u) found aset %p\n", 
 185                          * Use the new affinity set, add this thread 
 186                          * and place it in a suitable processor set. 
 188                         if (empty_aset 
!= NULL
) { 
 192                                 aset 
= affinity_set_alloc(); 
 194                                         lck_mtx_unlock(&aspc
->aspc_lock
); 
 195                                         thread_mtx_unlock(thread
); 
 196                                         return KERN_RESOURCE_SHORTAGE
; 
 199                         DBG("thread_affinity_set(%p,%u) (re-)using aset %p\n", 
 201                         aset
->aset_tag 
= tag
; 
 202                         affinity_set_place(aspc
, aset
); 
 204                 affinity_set_add(aset
, thread
); 
 207         lck_mtx_unlock(&aspc
->aspc_lock
); 
 208         thread_mtx_unlock(thread
); 
 211          * If we wound up not using an empty aset we created, 
 214         if (empty_aset 
!= NULL
) { 
 215                 affinity_set_free(empty_aset
); 
 218         if (thread 
== current_thread()) { 
 219                 thread_block(THREAD_CONTINUE_NULL
); 
 226  * task_affinity_create() 
 227  * Called from task create. 
 230 task_affinity_create(task_t parent_task
, task_t child_task
) 
 232         affinity_space_t        aspc 
= parent_task
->affinity_space
; 
 234         DBG("task_affinity_create(%p,%p)\n", parent_task
, child_task
); 
 239          * Bump the task reference count on the shared namespace and 
 240          * give it to the child. 
 242         lck_mtx_lock(&aspc
->aspc_lock
); 
 243         aspc
->aspc_task_count
++; 
 244         child_task
->affinity_space 
= aspc
; 
 245         lck_mtx_unlock(&aspc
->aspc_lock
); 
 249  * task_affinity_deallocate() 
 250  * Called from task_deallocate() when there's a namespace to dereference. 
 253 task_affinity_deallocate(task_t task
) 
 255         affinity_space_t        aspc 
= task
->affinity_space
; 
 257         DBG("task_affinity_deallocate(%p) aspc %p task_count %d\n", 
 258             task
, aspc
, aspc
->aspc_task_count
); 
 260         lck_mtx_lock(&aspc
->aspc_lock
); 
 261         if (--(aspc
->aspc_task_count
) == 0) { 
 262                 assert(queue_empty(&aspc
->aspc_affinities
)); 
 263                 lck_mtx_unlock(&aspc
->aspc_lock
); 
 264                 affinity_space_free(aspc
); 
 266                 lck_mtx_unlock(&aspc
->aspc_lock
); 
 271  * task_affinity_info() 
 272  * Return affinity tag info (number, min, max) for the task. 
 274  * Conditions: task is locked. 
 279         task_info_t             task_info_out
, 
 280         mach_msg_type_number_t  
*task_info_count
) 
 283         affinity_space_t                aspc
; 
 284         task_affinity_tag_info_t        info
; 
 286         *task_info_count 
= TASK_AFFINITY_TAG_INFO_COUNT
; 
 287         info 
= (task_affinity_tag_info_t
) task_info_out
; 
 289         info
->task_count 
= 0; 
 290         info
->min 
= THREAD_AFFINITY_TAG_NULL
; 
 291         info
->max 
= THREAD_AFFINITY_TAG_NULL
; 
 293         aspc 
= task
->affinity_space
; 
 295                 lck_mtx_lock(&aspc
->aspc_lock
); 
 296                 queue_iterate(&aspc
->aspc_affinities
, 
 297                     aset
, affinity_set_t
, aset_affinities
) { 
 299                         if (info
->min 
== THREAD_AFFINITY_TAG_NULL 
|| 
 300                             aset
->aset_tag 
< (uint32_t) info
->min
) { 
 301                                 info
->min 
= aset
->aset_tag
; 
 303                         if (info
->max 
== THREAD_AFFINITY_TAG_NULL 
|| 
 304                             aset
->aset_tag 
> (uint32_t) info
->max
) { 
 305                                 info
->max 
= aset
->aset_tag
; 
 308                 info
->task_count 
= aspc
->aspc_task_count
; 
 309                 lck_mtx_unlock(&aspc
->aspc_lock
); 
 315  * Called from thread_dup() during fork() with child's mutex held. 
 316  * Set the child into the parent's affinity set. 
 317  * Note the affinity space is shared. 
 320 thread_affinity_dup(thread_t parent
, thread_t child
) 
 323         affinity_space_t                aspc
; 
 325         thread_mtx_lock(parent
); 
 326         aset 
= parent
->affinity_set
; 
 327         DBG("thread_affinity_dup(%p,%p) aset %p\n", parent
, child
, aset
); 
 329                 thread_mtx_unlock(parent
); 
 333         aspc 
= aset
->aset_space
; 
 334         assert(aspc 
== parent
->task
->affinity_space
); 
 335         assert(aspc 
== child
->task
->affinity_space
); 
 337         lck_mtx_lock(&aspc
->aspc_lock
); 
 338         affinity_set_add(aset
, child
); 
 339         lck_mtx_unlock(&aspc
->aspc_lock
); 
 341         thread_mtx_unlock(parent
); 
 345  * thread_affinity_terminate() 
 346  * Remove thread from any affinity set. 
 347  * Called with the thread mutex locked. 
 350 thread_affinity_terminate(thread_t thread
) 
 352         affinity_set_t          aset 
= thread
->affinity_set
; 
 353         affinity_space_t        aspc
; 
 355         DBG("thread_affinity_terminate(%p)\n", thread
); 
 357         aspc 
= aset
->aset_space
; 
 358         lck_mtx_lock(&aspc
->aspc_lock
); 
 359         if (affinity_set_remove(aset
, thread
)) { 
 360                 affinity_set_free(aset
); 
 362         lck_mtx_unlock(&aspc
->aspc_lock
); 
 366  * thread_affinity_exec() 
 367  * Called from execve() to cancel any current affinity - a new image implies 
 368  * the calling thread terminates any expressed or inherited affinity. 
 371 thread_affinity_exec(thread_t thread
) 
 373         if (thread
->affinity_set 
!= AFFINITY_SET_NULL
) { 
 374                 thread_affinity_terminate(thread
); 
 379  * Create an empty affinity namespace data structure. 
 381 static affinity_space_t
 
 382 affinity_space_alloc(void) 
 384         affinity_space_t        aspc
; 
 386         aspc 
= (affinity_space_t
) kalloc(sizeof(struct affinity_space
)); 
 391         lck_mtx_init(&aspc
->aspc_lock
, &task_lck_grp
, &task_lck_attr
); 
 392         queue_init(&aspc
->aspc_affinities
); 
 393         aspc
->aspc_task_count 
= 1; 
 395         DBG("affinity_space_create() returns %p\n", aspc
); 
 400  * Destroy the given empty affinity namespace data structure. 
 403 affinity_space_free(affinity_space_t aspc
) 
 405         assert(queue_empty(&aspc
->aspc_affinities
)); 
 407         lck_mtx_destroy(&aspc
->aspc_lock
, &task_lck_grp
); 
 408         DBG("affinity_space_free(%p)\n", aspc
); 
 409         kfree(aspc
, sizeof(struct affinity_space
)); 
 414  * Create an empty affinity set data structure 
 415  * entering it into a list anchored by the owning task. 
 417 static affinity_set_t
 
 418 affinity_set_alloc(void) 
 422         aset 
= (affinity_set_t
) kalloc(sizeof(struct affinity_set
)); 
 427         aset
->aset_thread_count 
= 0; 
 428         queue_init(&aset
->aset_affinities
); 
 429         queue_init(&aset
->aset_threads
); 
 431         aset
->aset_pset 
= PROCESSOR_SET_NULL
; 
 432         aset
->aset_space 
= NULL
; 
 434         DBG("affinity_set_create() returns %p\n", aset
); 
 439  * Destroy the given empty affinity set data structure 
 440  * after removing it from the parent task. 
 443 affinity_set_free(affinity_set_t aset
) 
 445         assert(queue_empty(&aset
->aset_threads
)); 
 447         DBG("affinity_set_free(%p)\n", aset
); 
 448         kfree(aset
, sizeof(struct affinity_set
)); 
 452  * Add a thread to an affinity set. 
 453  * The caller must have the thread mutex and space locked. 
 456 affinity_set_add(affinity_set_t aset
, thread_t thread
) 
 460         DBG("affinity_set_add(%p,%p)\n", aset
, thread
); 
 461         queue_enter(&aset
->aset_threads
, 
 462             thread
, thread_t
, affinity_threads
); 
 463         aset
->aset_thread_count
++; 
 466         thread
->affinity_set 
= affinity_sets_enabled 
? aset 
: NULL
; 
 467         thread_unlock(thread
); 
 472  * Remove a thread from an affinity set returning the set if now empty. 
 473  * The caller must have the thread mutex and space locked. 
 475 static affinity_set_t
 
 476 affinity_set_remove(affinity_set_t aset
, thread_t thread
) 
 482         thread
->affinity_set 
= NULL
; 
 483         thread_unlock(thread
); 
 486         aset
->aset_thread_count
--; 
 487         queue_remove(&aset
->aset_threads
, 
 488             thread
, thread_t
, affinity_threads
); 
 489         if (queue_empty(&aset
->aset_threads
)) { 
 490                 queue_remove(&aset
->aset_space
->aspc_affinities
, 
 491                     aset
, affinity_set_t
, aset_affinities
); 
 492                 assert(aset
->aset_thread_count 
== 0); 
 493                 aset
->aset_tag 
= THREAD_AFFINITY_TAG_NULL
; 
 495                 aset
->aset_pset 
= PROCESSOR_SET_NULL
; 
 496                 aset
->aset_space 
= NULL
; 
 497                 DBG("affinity_set_remove(%p,%p) set now empty\n", aset
, thread
); 
 500                 DBG("affinity_set_remove(%p,%p)\n", aset
, thread
); 
 506  * Find an affinity set in the parent task with the given affinity tag. 
 507  * The caller must have the space locked. 
 509 static affinity_set_t
 
 510 affinity_set_find(affinity_space_t space
, uint32_t tag
) 
 514         queue_iterate(&space
->aspc_affinities
, 
 515             aset
, affinity_set_t
, aset_affinities
) { 
 516                 if (aset
->aset_tag 
== tag
) { 
 517                         DBG("affinity_set_find(%p,%u) finds %p\n", 
 522         DBG("affinity_set_find(%p,%u) not found\n", space
, tag
); 
 527  * affinity_set_place() assigns an affinity set to a suitable processor_set. 
 528  * The selection criteria is: 
 529  *  - the set currently occupied by the least number of affinities 
 530  *    belonging to the owning the task. 
 531  * The caller must have the space locked. 
 534 affinity_set_place(affinity_space_t aspc
, affinity_set_t new_aset
) 
 536         unsigned int    num_cpu_asets 
= ml_get_max_affinity_sets(); 
 537         unsigned int    set_occupancy
[num_cpu_asets
]; 
 539         unsigned int    i_least_occupied
; 
 542         for (i 
= 0; i 
< num_cpu_asets
; i
++) { 
 543                 set_occupancy
[i
] = 0; 
 547          * Scan the affinity sets calculating the number of sets 
 548          * occupy the available physical affinities. 
 550         queue_iterate(&aspc
->aspc_affinities
, 
 551             aset
, affinity_set_t
, aset_affinities
) { 
 552                 if (aset
->aset_num 
< num_cpu_asets
) { 
 553                         set_occupancy
[aset
->aset_num
]++; 
 555                         panic("aset_num = %d in %s\n", aset
->aset_num
, __FUNCTION__
); 
 560          * Find the least occupied set (or the first empty set). 
 561          * To distribute placements somewhat, start searching from 
 562          * a cpu affinity chosen randomly per namespace: 
 563          *   [(unsigned int)aspc % 127] % num_cpu_asets 
 564          * unless this mapping policy is overridden. 
 566         if (affinity_sets_mapping 
== 0) { 
 567                 i_least_occupied 
= 0; 
 569                 i_least_occupied 
= (unsigned int)(((uintptr_t)aspc 
% 127) % num_cpu_asets
); 
 571         for (i 
= 0; i 
< num_cpu_asets
; i
++) { 
 572                 unsigned int    j 
= (i_least_occupied 
+ i
) % num_cpu_asets
; 
 573                 if (set_occupancy
[j
] == 0) { 
 574                         i_least_occupied 
= j
; 
 577                 if (set_occupancy
[j
] < set_occupancy
[i_least_occupied
]) { 
 578                         i_least_occupied 
= j
; 
 581         new_aset
->aset_num 
= i_least_occupied
; 
 582         new_aset
->aset_pset 
= ml_affinity_to_pset(i_least_occupied
); 
 584         /* Add the new affinity set to the group */ 
 585         new_aset
->aset_space 
= aspc
; 
 586         queue_enter(&aspc
->aspc_affinities
, 
 587             new_aset
, affinity_set_t
, aset_affinities
); 
 589         DBG("affinity_set_place(%p,%p) selected affinity %u pset %p\n", 
 590             aspc
, new_aset
, new_aset
->aset_num
, new_aset
->aset_pset
);