2 * Copyright (c) 2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <kern/affinity.h>
30 #include <kern/task.h>
31 #include <kern/kalloc.h>
32 #include <machine/cpu_affinity.h>
35 * Affinity involves 2 objects:
36 * - affinity namespace:
37 * shared by a task family, this controls affinity tag lookup and
38 * allocation; it anchors all affinity sets in one namespace
40 * anchors all threads with membership of this affinity set
41 * and which share an affinity tag in the owning namespace.
44 * - The task lock protects the creation of an affinity namespace.
45 * - The affinity namespace mutex protects the inheritance of a namespace
46 * and its thread membership. This includes its destruction when the task
47 * reference count goes to zero.
48 * - The thread mutex protects a thread's affinity set membership, but in
49 * addition, the thread_lock is taken to write thread->affinity_set since this
50 * field (representng the active affinity set) is read by the scheduler.
52 * The lock ordering is: task lock, thread mutex, namespace mutex, thread lock.
56 #define DBG(x...) kprintf("DBG: " x)
61 struct affinity_space
{
63 uint32_t aspc_task_count
;
64 queue_head_t aspc_affinities
;
66 typedef struct affinity_space
*affinity_space_t
;
68 static affinity_space_t
affinity_space_alloc(void);
69 static void affinity_space_free(affinity_space_t aspc
);
70 static affinity_set_t
affinity_set_alloc(void);
71 static void affinity_set_free(affinity_set_t aset
);
72 static affinity_set_t
affinity_set_find(affinity_space_t aspc
, uint32_t tag
);
73 static void affinity_set_place(affinity_space_t aspc
, affinity_set_t aset
);
74 static void affinity_set_add(affinity_set_t aset
, thread_t thread
);
75 static affinity_set_t
affinity_set_remove(affinity_set_t aset
, thread_t thread
);
78 * The following globals may be modified by the sysctls
79 * kern.affinity_sets_enabled - disables hinting if cleared
80 * kern.affinity_sets_mapping - controls cache distribution policy
81 * See bsd/kern_sysctl.c
83 * Affinity sets are not used on embedded, which typically only
84 * has a single pset, and last-processor affinity is
85 * more important than pset affinity.
87 #if !defined(XNU_TARGET_OS_OSX)
88 boolean_t affinity_sets_enabled
= FALSE
;
89 int affinity_sets_mapping
= 0;
90 #else /* !defined(XNU_TARGET_OS_OSX) */
91 boolean_t affinity_sets_enabled
= TRUE
;
92 int affinity_sets_mapping
= 1;
93 #endif /* !defined(XNU_TARGET_OS_OSX) */
96 thread_affinity_is_supported(void)
98 return ml_get_max_affinity_sets() != 0;
103 * thread_affinity_get()
104 * Return the affinity tag for a thread.
105 * Called with the thread mutex held.
108 thread_affinity_get(thread_t thread
)
112 if (thread
->affinity_set
!= NULL
) {
113 tag
= thread
->affinity_set
->aset_tag
;
115 tag
= THREAD_AFFINITY_TAG_NULL
;
123 * thread_affinity_set()
124 * Place a thread in an affinity set identified by a tag.
125 * Called with thread referenced but not locked.
128 thread_affinity_set(thread_t thread
, uint32_t tag
)
131 affinity_set_t empty_aset
= NULL
;
132 affinity_space_t aspc
;
133 affinity_space_t new_aspc
= NULL
;
135 DBG("thread_affinity_set(%p,%u)\n", thread
, tag
);
137 task_lock(thread
->task
);
138 aspc
= thread
->task
->affinity_space
;
140 task_unlock(thread
->task
);
141 new_aspc
= affinity_space_alloc();
142 if (new_aspc
== NULL
) {
143 return KERN_RESOURCE_SHORTAGE
;
145 task_lock(thread
->task
);
146 if (thread
->task
->affinity_space
== NULL
) {
147 thread
->task
->affinity_space
= new_aspc
;
150 aspc
= thread
->task
->affinity_space
;
152 task_unlock(thread
->task
);
154 affinity_space_free(new_aspc
);
157 thread_mtx_lock(thread
);
158 if (!thread
->active
) {
159 /* Beaten to lock and the thread is dead */
160 thread_mtx_unlock(thread
);
161 return KERN_TERMINATED
;
164 lck_mtx_lock(&aspc
->aspc_lock
);
165 aset
= thread
->affinity_set
;
168 * Remove thread from current affinity set
170 DBG("thread_affinity_set(%p,%u) removing from aset %p\n",
172 empty_aset
= affinity_set_remove(aset
, thread
);
175 if (tag
!= THREAD_AFFINITY_TAG_NULL
) {
176 aset
= affinity_set_find(aspc
, tag
);
179 * Add thread to existing affinity set
181 DBG("thread_affinity_set(%p,%u) found aset %p\n",
185 * Use the new affinity set, add this thread
186 * and place it in a suitable processor set.
188 if (empty_aset
!= NULL
) {
192 aset
= affinity_set_alloc();
194 lck_mtx_unlock(&aspc
->aspc_lock
);
195 thread_mtx_unlock(thread
);
196 return KERN_RESOURCE_SHORTAGE
;
199 DBG("thread_affinity_set(%p,%u) (re-)using aset %p\n",
201 aset
->aset_tag
= tag
;
202 affinity_set_place(aspc
, aset
);
204 affinity_set_add(aset
, thread
);
207 lck_mtx_unlock(&aspc
->aspc_lock
);
208 thread_mtx_unlock(thread
);
211 * If we wound up not using an empty aset we created,
214 if (empty_aset
!= NULL
) {
215 affinity_set_free(empty_aset
);
218 if (thread
== current_thread()) {
219 thread_block(THREAD_CONTINUE_NULL
);
226 * task_affinity_create()
227 * Called from task create.
230 task_affinity_create(task_t parent_task
, task_t child_task
)
232 affinity_space_t aspc
= parent_task
->affinity_space
;
234 DBG("task_affinity_create(%p,%p)\n", parent_task
, child_task
);
239 * Bump the task reference count on the shared namespace and
240 * give it to the child.
242 lck_mtx_lock(&aspc
->aspc_lock
);
243 aspc
->aspc_task_count
++;
244 child_task
->affinity_space
= aspc
;
245 lck_mtx_unlock(&aspc
->aspc_lock
);
249 * task_affinity_deallocate()
250 * Called from task_deallocate() when there's a namespace to dereference.
253 task_affinity_deallocate(task_t task
)
255 affinity_space_t aspc
= task
->affinity_space
;
257 DBG("task_affinity_deallocate(%p) aspc %p task_count %d\n",
258 task
, aspc
, aspc
->aspc_task_count
);
260 lck_mtx_lock(&aspc
->aspc_lock
);
261 if (--(aspc
->aspc_task_count
) == 0) {
262 assert(queue_empty(&aspc
->aspc_affinities
));
263 lck_mtx_unlock(&aspc
->aspc_lock
);
264 affinity_space_free(aspc
);
266 lck_mtx_unlock(&aspc
->aspc_lock
);
271 * task_affinity_info()
272 * Return affinity tag info (number, min, max) for the task.
274 * Conditions: task is locked.
279 task_info_t task_info_out
,
280 mach_msg_type_number_t
*task_info_count
)
283 affinity_space_t aspc
;
284 task_affinity_tag_info_t info
;
286 *task_info_count
= TASK_AFFINITY_TAG_INFO_COUNT
;
287 info
= (task_affinity_tag_info_t
) task_info_out
;
289 info
->task_count
= 0;
290 info
->min
= THREAD_AFFINITY_TAG_NULL
;
291 info
->max
= THREAD_AFFINITY_TAG_NULL
;
293 aspc
= task
->affinity_space
;
295 lck_mtx_lock(&aspc
->aspc_lock
);
296 queue_iterate(&aspc
->aspc_affinities
,
297 aset
, affinity_set_t
, aset_affinities
) {
299 if (info
->min
== THREAD_AFFINITY_TAG_NULL
||
300 aset
->aset_tag
< (uint32_t) info
->min
) {
301 info
->min
= aset
->aset_tag
;
303 if (info
->max
== THREAD_AFFINITY_TAG_NULL
||
304 aset
->aset_tag
> (uint32_t) info
->max
) {
305 info
->max
= aset
->aset_tag
;
308 info
->task_count
= aspc
->aspc_task_count
;
309 lck_mtx_unlock(&aspc
->aspc_lock
);
315 * Called from thread_dup() during fork() with child's mutex held.
316 * Set the child into the parent's affinity set.
317 * Note the affinity space is shared.
320 thread_affinity_dup(thread_t parent
, thread_t child
)
323 affinity_space_t aspc
;
325 thread_mtx_lock(parent
);
326 aset
= parent
->affinity_set
;
327 DBG("thread_affinity_dup(%p,%p) aset %p\n", parent
, child
, aset
);
329 thread_mtx_unlock(parent
);
333 aspc
= aset
->aset_space
;
334 assert(aspc
== parent
->task
->affinity_space
);
335 assert(aspc
== child
->task
->affinity_space
);
337 lck_mtx_lock(&aspc
->aspc_lock
);
338 affinity_set_add(aset
, child
);
339 lck_mtx_unlock(&aspc
->aspc_lock
);
341 thread_mtx_unlock(parent
);
345 * thread_affinity_terminate()
346 * Remove thread from any affinity set.
347 * Called with the thread mutex locked.
350 thread_affinity_terminate(thread_t thread
)
352 affinity_set_t aset
= thread
->affinity_set
;
353 affinity_space_t aspc
;
355 DBG("thread_affinity_terminate(%p)\n", thread
);
357 aspc
= aset
->aset_space
;
358 lck_mtx_lock(&aspc
->aspc_lock
);
359 if (affinity_set_remove(aset
, thread
)) {
360 affinity_set_free(aset
);
362 lck_mtx_unlock(&aspc
->aspc_lock
);
366 * thread_affinity_exec()
367 * Called from execve() to cancel any current affinity - a new image implies
368 * the calling thread terminates any expressed or inherited affinity.
371 thread_affinity_exec(thread_t thread
)
373 if (thread
->affinity_set
!= AFFINITY_SET_NULL
) {
374 thread_affinity_terminate(thread
);
379 * Create an empty affinity namespace data structure.
381 static affinity_space_t
382 affinity_space_alloc(void)
384 affinity_space_t aspc
;
386 aspc
= (affinity_space_t
) kalloc(sizeof(struct affinity_space
));
391 lck_mtx_init(&aspc
->aspc_lock
, &task_lck_grp
, &task_lck_attr
);
392 queue_init(&aspc
->aspc_affinities
);
393 aspc
->aspc_task_count
= 1;
395 DBG("affinity_space_create() returns %p\n", aspc
);
400 * Destroy the given empty affinity namespace data structure.
403 affinity_space_free(affinity_space_t aspc
)
405 assert(queue_empty(&aspc
->aspc_affinities
));
407 lck_mtx_destroy(&aspc
->aspc_lock
, &task_lck_grp
);
408 DBG("affinity_space_free(%p)\n", aspc
);
409 kfree(aspc
, sizeof(struct affinity_space
));
414 * Create an empty affinity set data structure
415 * entering it into a list anchored by the owning task.
417 static affinity_set_t
418 affinity_set_alloc(void)
422 aset
= (affinity_set_t
) kalloc(sizeof(struct affinity_set
));
427 aset
->aset_thread_count
= 0;
428 queue_init(&aset
->aset_affinities
);
429 queue_init(&aset
->aset_threads
);
431 aset
->aset_pset
= PROCESSOR_SET_NULL
;
432 aset
->aset_space
= NULL
;
434 DBG("affinity_set_create() returns %p\n", aset
);
439 * Destroy the given empty affinity set data structure
440 * after removing it from the parent task.
443 affinity_set_free(affinity_set_t aset
)
445 assert(queue_empty(&aset
->aset_threads
));
447 DBG("affinity_set_free(%p)\n", aset
);
448 kfree(aset
, sizeof(struct affinity_set
));
452 * Add a thread to an affinity set.
453 * The caller must have the thread mutex and space locked.
456 affinity_set_add(affinity_set_t aset
, thread_t thread
)
460 DBG("affinity_set_add(%p,%p)\n", aset
, thread
);
461 queue_enter(&aset
->aset_threads
,
462 thread
, thread_t
, affinity_threads
);
463 aset
->aset_thread_count
++;
466 thread
->affinity_set
= affinity_sets_enabled
? aset
: NULL
;
467 thread_unlock(thread
);
472 * Remove a thread from an affinity set returning the set if now empty.
473 * The caller must have the thread mutex and space locked.
475 static affinity_set_t
476 affinity_set_remove(affinity_set_t aset
, thread_t thread
)
482 thread
->affinity_set
= NULL
;
483 thread_unlock(thread
);
486 aset
->aset_thread_count
--;
487 queue_remove(&aset
->aset_threads
,
488 thread
, thread_t
, affinity_threads
);
489 if (queue_empty(&aset
->aset_threads
)) {
490 queue_remove(&aset
->aset_space
->aspc_affinities
,
491 aset
, affinity_set_t
, aset_affinities
);
492 assert(aset
->aset_thread_count
== 0);
493 aset
->aset_tag
= THREAD_AFFINITY_TAG_NULL
;
495 aset
->aset_pset
= PROCESSOR_SET_NULL
;
496 aset
->aset_space
= NULL
;
497 DBG("affinity_set_remove(%p,%p) set now empty\n", aset
, thread
);
500 DBG("affinity_set_remove(%p,%p)\n", aset
, thread
);
506 * Find an affinity set in the parent task with the given affinity tag.
507 * The caller must have the space locked.
509 static affinity_set_t
510 affinity_set_find(affinity_space_t space
, uint32_t tag
)
514 queue_iterate(&space
->aspc_affinities
,
515 aset
, affinity_set_t
, aset_affinities
) {
516 if (aset
->aset_tag
== tag
) {
517 DBG("affinity_set_find(%p,%u) finds %p\n",
522 DBG("affinity_set_find(%p,%u) not found\n", space
, tag
);
527 * affinity_set_place() assigns an affinity set to a suitable processor_set.
528 * The selection criteria is:
529 * - the set currently occupied by the least number of affinities
530 * belonging to the owning the task.
531 * The caller must have the space locked.
534 affinity_set_place(affinity_space_t aspc
, affinity_set_t new_aset
)
536 unsigned short set_occupancy
[MAX_CPUS
] = { 0 };
537 unsigned num_cpu_asets
= ml_get_max_affinity_sets();
538 unsigned i_least_occupied
;
541 if (__improbable(num_cpu_asets
> MAX_CPUS
)) {
542 // If this triggers then the array needs to be made bigger.
543 panic("num_cpu_asets = %d > %d too big in %s\n", num_cpu_asets
, MAX_CPUS
, __FUNCTION__
);
547 * Scan the affinity sets calculating the number of sets
548 * occupy the available physical affinities.
550 queue_iterate(&aspc
->aspc_affinities
,
551 aset
, affinity_set_t
, aset_affinities
) {
552 if (aset
->aset_num
< num_cpu_asets
) {
553 set_occupancy
[aset
->aset_num
]++;
555 panic("aset_num = %d in %s\n", aset
->aset_num
, __FUNCTION__
);
560 * Find the least occupied set (or the first empty set).
561 * To distribute placements somewhat, start searching from
562 * a cpu affinity chosen randomly per namespace:
563 * [(unsigned int)aspc % 127] % num_cpu_asets
564 * unless this mapping policy is overridden.
566 if (affinity_sets_mapping
== 0) {
567 i_least_occupied
= 0;
569 i_least_occupied
= (unsigned int)(((uintptr_t)aspc
% 127) % num_cpu_asets
);
571 for (unsigned i
= 0; i
< num_cpu_asets
; i
++) {
572 unsigned int j
= (i_least_occupied
+ i
) % num_cpu_asets
;
573 if (set_occupancy
[j
] == 0) {
574 i_least_occupied
= j
;
577 if (set_occupancy
[j
] < set_occupancy
[i_least_occupied
]) {
578 i_least_occupied
= j
;
581 new_aset
->aset_num
= i_least_occupied
;
582 new_aset
->aset_pset
= ml_affinity_to_pset(i_least_occupied
);
584 /* Add the new affinity set to the group */
585 new_aset
->aset_space
= aspc
;
586 queue_enter(&aspc
->aspc_affinities
,
587 new_aset
, affinity_set_t
, aset_affinities
);
589 DBG("affinity_set_place(%p,%p) selected affinity %u pset %p\n",
590 aspc
, new_aset
, new_aset
->aset_num
, new_aset
->aset_pset
);