]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/affinity.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / kern / affinity.c
1 /*
2 * Copyright (c) 2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <kern/affinity.h>
30 #include <kern/task.h>
31 #include <kern/kalloc.h>
32 #include <machine/cpu_affinity.h>
33
34 /*
35 * Affinity involves 2 objects:
36 * - affinity namespace:
37 * shared by a task family, this controls affinity tag lookup and
38 * allocation; it anchors all affinity sets in one namespace
39 * - affinity set:
40 * anchors all threads with membership of this affinity set
41 * and which share an affinity tag in the owning namespace.
42 *
43 * Locking:
44 * - The task lock protects the creation of an affinity namespace.
45 * - The affinity namespace mutex protects the inheritance of a namespace
46 * and its thread membership. This includes its destruction when the task
47 * reference count goes to zero.
48 * - The thread mutex protects a thread's affinity set membership, but in
49 * addition, the thread_lock is taken to write thread->affinity_set since this
50 * field (representng the active affinity set) is read by the scheduler.
51 *
52 * The lock ordering is: task lock, thread mutex, namespace mutex, thread lock.
53 */
54
55 #if AFFINITY_DEBUG
56 #define DBG(x...) kprintf("DBG: " x)
57 #else
58 #define DBG(x...)
59 #endif
60
61 struct affinity_space {
62 lck_mtx_t aspc_lock;
63 uint32_t aspc_task_count;
64 queue_head_t aspc_affinities;
65 };
66 typedef struct affinity_space *affinity_space_t;
67
68 static affinity_space_t affinity_space_alloc(void);
69 static void affinity_space_free(affinity_space_t aspc);
70 static affinity_set_t affinity_set_alloc(void);
71 static void affinity_set_free(affinity_set_t aset);
72 static affinity_set_t affinity_set_find(affinity_space_t aspc, uint32_t tag);
73 static void affinity_set_place(affinity_space_t aspc, affinity_set_t aset);
74 static void affinity_set_add(affinity_set_t aset, thread_t thread);
75 static affinity_set_t affinity_set_remove(affinity_set_t aset, thread_t thread);
76
77 /*
78 * The following globals may be modified by the sysctls
79 * kern.affinity_sets_enabled - disables hinting if cleared
80 * kern.affinity_sets_mapping - controls cache distribution policy
81 * See bsd/kern_sysctl.c
82 *
83 * Affinity sets are not used on embedded, which typically only
84 * has a single pset, and last-processor affinity is
85 * more important than pset affinity.
86 */
87 #if !defined(XNU_TARGET_OS_OSX)
88 boolean_t affinity_sets_enabled = FALSE;
89 int affinity_sets_mapping = 0;
90 #else /* !defined(XNU_TARGET_OS_OSX) */
91 boolean_t affinity_sets_enabled = TRUE;
92 int affinity_sets_mapping = 1;
93 #endif /* !defined(XNU_TARGET_OS_OSX) */
94
95 boolean_t
96 thread_affinity_is_supported(void)
97 {
98 return ml_get_max_affinity_sets() != 0;
99 }
100
101
102 /*
103 * thread_affinity_get()
104 * Return the affinity tag for a thread.
105 * Called with the thread mutex held.
106 */
107 uint32_t
108 thread_affinity_get(thread_t thread)
109 {
110 uint32_t tag;
111
112 if (thread->affinity_set != NULL) {
113 tag = thread->affinity_set->aset_tag;
114 } else {
115 tag = THREAD_AFFINITY_TAG_NULL;
116 }
117
118 return tag;
119 }
120
121
122 /*
123 * thread_affinity_set()
124 * Place a thread in an affinity set identified by a tag.
125 * Called with thread referenced but not locked.
126 */
127 kern_return_t
128 thread_affinity_set(thread_t thread, uint32_t tag)
129 {
130 affinity_set_t aset;
131 affinity_set_t empty_aset = NULL;
132 affinity_space_t aspc;
133 affinity_space_t new_aspc = NULL;
134
135 DBG("thread_affinity_set(%p,%u)\n", thread, tag);
136
137 task_lock(thread->task);
138 aspc = thread->task->affinity_space;
139 if (aspc == NULL) {
140 task_unlock(thread->task);
141 new_aspc = affinity_space_alloc();
142 if (new_aspc == NULL) {
143 return KERN_RESOURCE_SHORTAGE;
144 }
145 task_lock(thread->task);
146 if (thread->task->affinity_space == NULL) {
147 thread->task->affinity_space = new_aspc;
148 new_aspc = NULL;
149 }
150 aspc = thread->task->affinity_space;
151 }
152 task_unlock(thread->task);
153 if (new_aspc) {
154 affinity_space_free(new_aspc);
155 }
156
157 thread_mtx_lock(thread);
158 if (!thread->active) {
159 /* Beaten to lock and the thread is dead */
160 thread_mtx_unlock(thread);
161 return KERN_TERMINATED;
162 }
163
164 lck_mtx_lock(&aspc->aspc_lock);
165 aset = thread->affinity_set;
166 if (aset != NULL) {
167 /*
168 * Remove thread from current affinity set
169 */
170 DBG("thread_affinity_set(%p,%u) removing from aset %p\n",
171 thread, tag, aset);
172 empty_aset = affinity_set_remove(aset, thread);
173 }
174
175 if (tag != THREAD_AFFINITY_TAG_NULL) {
176 aset = affinity_set_find(aspc, tag);
177 if (aset != NULL) {
178 /*
179 * Add thread to existing affinity set
180 */
181 DBG("thread_affinity_set(%p,%u) found aset %p\n",
182 thread, tag, aset);
183 } else {
184 /*
185 * Use the new affinity set, add this thread
186 * and place it in a suitable processor set.
187 */
188 if (empty_aset != NULL) {
189 aset = empty_aset;
190 empty_aset = NULL;
191 } else {
192 aset = affinity_set_alloc();
193 if (aset == NULL) {
194 lck_mtx_unlock(&aspc->aspc_lock);
195 thread_mtx_unlock(thread);
196 return KERN_RESOURCE_SHORTAGE;
197 }
198 }
199 DBG("thread_affinity_set(%p,%u) (re-)using aset %p\n",
200 thread, tag, aset);
201 aset->aset_tag = tag;
202 affinity_set_place(aspc, aset);
203 }
204 affinity_set_add(aset, thread);
205 }
206
207 lck_mtx_unlock(&aspc->aspc_lock);
208 thread_mtx_unlock(thread);
209
210 /*
211 * If we wound up not using an empty aset we created,
212 * free it here.
213 */
214 if (empty_aset != NULL) {
215 affinity_set_free(empty_aset);
216 }
217
218 if (thread == current_thread()) {
219 thread_block(THREAD_CONTINUE_NULL);
220 }
221
222 return KERN_SUCCESS;
223 }
224
225 /*
226 * task_affinity_create()
227 * Called from task create.
228 */
229 void
230 task_affinity_create(task_t parent_task, task_t child_task)
231 {
232 affinity_space_t aspc = parent_task->affinity_space;
233
234 DBG("task_affinity_create(%p,%p)\n", parent_task, child_task);
235
236 assert(aspc);
237
238 /*
239 * Bump the task reference count on the shared namespace and
240 * give it to the child.
241 */
242 lck_mtx_lock(&aspc->aspc_lock);
243 aspc->aspc_task_count++;
244 child_task->affinity_space = aspc;
245 lck_mtx_unlock(&aspc->aspc_lock);
246 }
247
248 /*
249 * task_affinity_deallocate()
250 * Called from task_deallocate() when there's a namespace to dereference.
251 */
252 void
253 task_affinity_deallocate(task_t task)
254 {
255 affinity_space_t aspc = task->affinity_space;
256
257 DBG("task_affinity_deallocate(%p) aspc %p task_count %d\n",
258 task, aspc, aspc->aspc_task_count);
259
260 lck_mtx_lock(&aspc->aspc_lock);
261 if (--(aspc->aspc_task_count) == 0) {
262 assert(queue_empty(&aspc->aspc_affinities));
263 lck_mtx_unlock(&aspc->aspc_lock);
264 affinity_space_free(aspc);
265 } else {
266 lck_mtx_unlock(&aspc->aspc_lock);
267 }
268 }
269
270 /*
271 * task_affinity_info()
272 * Return affinity tag info (number, min, max) for the task.
273 *
274 * Conditions: task is locked.
275 */
276 kern_return_t
277 task_affinity_info(
278 task_t task,
279 task_info_t task_info_out,
280 mach_msg_type_number_t *task_info_count)
281 {
282 affinity_set_t aset;
283 affinity_space_t aspc;
284 task_affinity_tag_info_t info;
285
286 *task_info_count = TASK_AFFINITY_TAG_INFO_COUNT;
287 info = (task_affinity_tag_info_t) task_info_out;
288 info->set_count = 0;
289 info->task_count = 0;
290 info->min = THREAD_AFFINITY_TAG_NULL;
291 info->max = THREAD_AFFINITY_TAG_NULL;
292
293 aspc = task->affinity_space;
294 if (aspc) {
295 lck_mtx_lock(&aspc->aspc_lock);
296 queue_iterate(&aspc->aspc_affinities,
297 aset, affinity_set_t, aset_affinities) {
298 info->set_count++;
299 if (info->min == THREAD_AFFINITY_TAG_NULL ||
300 aset->aset_tag < (uint32_t) info->min) {
301 info->min = aset->aset_tag;
302 }
303 if (info->max == THREAD_AFFINITY_TAG_NULL ||
304 aset->aset_tag > (uint32_t) info->max) {
305 info->max = aset->aset_tag;
306 }
307 }
308 info->task_count = aspc->aspc_task_count;
309 lck_mtx_unlock(&aspc->aspc_lock);
310 }
311 return KERN_SUCCESS;
312 }
313
314 /*
315 * Called from thread_dup() during fork() with child's mutex held.
316 * Set the child into the parent's affinity set.
317 * Note the affinity space is shared.
318 */
319 void
320 thread_affinity_dup(thread_t parent, thread_t child)
321 {
322 affinity_set_t aset;
323 affinity_space_t aspc;
324
325 thread_mtx_lock(parent);
326 aset = parent->affinity_set;
327 DBG("thread_affinity_dup(%p,%p) aset %p\n", parent, child, aset);
328 if (aset == NULL) {
329 thread_mtx_unlock(parent);
330 return;
331 }
332
333 aspc = aset->aset_space;
334 assert(aspc == parent->task->affinity_space);
335 assert(aspc == child->task->affinity_space);
336
337 lck_mtx_lock(&aspc->aspc_lock);
338 affinity_set_add(aset, child);
339 lck_mtx_unlock(&aspc->aspc_lock);
340
341 thread_mtx_unlock(parent);
342 }
343
344 /*
345 * thread_affinity_terminate()
346 * Remove thread from any affinity set.
347 * Called with the thread mutex locked.
348 */
349 void
350 thread_affinity_terminate(thread_t thread)
351 {
352 affinity_set_t aset = thread->affinity_set;
353 affinity_space_t aspc;
354
355 DBG("thread_affinity_terminate(%p)\n", thread);
356
357 aspc = aset->aset_space;
358 lck_mtx_lock(&aspc->aspc_lock);
359 if (affinity_set_remove(aset, thread)) {
360 affinity_set_free(aset);
361 }
362 lck_mtx_unlock(&aspc->aspc_lock);
363 }
364
365 /*
366 * thread_affinity_exec()
367 * Called from execve() to cancel any current affinity - a new image implies
368 * the calling thread terminates any expressed or inherited affinity.
369 */
370 void
371 thread_affinity_exec(thread_t thread)
372 {
373 if (thread->affinity_set != AFFINITY_SET_NULL) {
374 thread_affinity_terminate(thread);
375 }
376 }
377
378 /*
379 * Create an empty affinity namespace data structure.
380 */
381 static affinity_space_t
382 affinity_space_alloc(void)
383 {
384 affinity_space_t aspc;
385
386 aspc = (affinity_space_t) kalloc(sizeof(struct affinity_space));
387 if (aspc == NULL) {
388 return NULL;
389 }
390
391 lck_mtx_init(&aspc->aspc_lock, &task_lck_grp, &task_lck_attr);
392 queue_init(&aspc->aspc_affinities);
393 aspc->aspc_task_count = 1;
394
395 DBG("affinity_space_create() returns %p\n", aspc);
396 return aspc;
397 }
398
399 /*
400 * Destroy the given empty affinity namespace data structure.
401 */
402 static void
403 affinity_space_free(affinity_space_t aspc)
404 {
405 assert(queue_empty(&aspc->aspc_affinities));
406
407 lck_mtx_destroy(&aspc->aspc_lock, &task_lck_grp);
408 DBG("affinity_space_free(%p)\n", aspc);
409 kfree(aspc, sizeof(struct affinity_space));
410 }
411
412
413 /*
414 * Create an empty affinity set data structure
415 * entering it into a list anchored by the owning task.
416 */
417 static affinity_set_t
418 affinity_set_alloc(void)
419 {
420 affinity_set_t aset;
421
422 aset = (affinity_set_t) kalloc(sizeof(struct affinity_set));
423 if (aset == NULL) {
424 return NULL;
425 }
426
427 aset->aset_thread_count = 0;
428 queue_init(&aset->aset_affinities);
429 queue_init(&aset->aset_threads);
430 aset->aset_num = 0;
431 aset->aset_pset = PROCESSOR_SET_NULL;
432 aset->aset_space = NULL;
433
434 DBG("affinity_set_create() returns %p\n", aset);
435 return aset;
436 }
437
438 /*
439 * Destroy the given empty affinity set data structure
440 * after removing it from the parent task.
441 */
442 static void
443 affinity_set_free(affinity_set_t aset)
444 {
445 assert(queue_empty(&aset->aset_threads));
446
447 DBG("affinity_set_free(%p)\n", aset);
448 kfree(aset, sizeof(struct affinity_set));
449 }
450
451 /*
452 * Add a thread to an affinity set.
453 * The caller must have the thread mutex and space locked.
454 */
455 static void
456 affinity_set_add(affinity_set_t aset, thread_t thread)
457 {
458 spl_t s;
459
460 DBG("affinity_set_add(%p,%p)\n", aset, thread);
461 queue_enter(&aset->aset_threads,
462 thread, thread_t, affinity_threads);
463 aset->aset_thread_count++;
464 s = splsched();
465 thread_lock(thread);
466 thread->affinity_set = affinity_sets_enabled ? aset : NULL;
467 thread_unlock(thread);
468 splx(s);
469 }
470
471 /*
472 * Remove a thread from an affinity set returning the set if now empty.
473 * The caller must have the thread mutex and space locked.
474 */
475 static affinity_set_t
476 affinity_set_remove(affinity_set_t aset, thread_t thread)
477 {
478 spl_t s;
479
480 s = splsched();
481 thread_lock(thread);
482 thread->affinity_set = NULL;
483 thread_unlock(thread);
484 splx(s);
485
486 aset->aset_thread_count--;
487 queue_remove(&aset->aset_threads,
488 thread, thread_t, affinity_threads);
489 if (queue_empty(&aset->aset_threads)) {
490 queue_remove(&aset->aset_space->aspc_affinities,
491 aset, affinity_set_t, aset_affinities);
492 assert(aset->aset_thread_count == 0);
493 aset->aset_tag = THREAD_AFFINITY_TAG_NULL;
494 aset->aset_num = 0;
495 aset->aset_pset = PROCESSOR_SET_NULL;
496 aset->aset_space = NULL;
497 DBG("affinity_set_remove(%p,%p) set now empty\n", aset, thread);
498 return aset;
499 } else {
500 DBG("affinity_set_remove(%p,%p)\n", aset, thread);
501 return NULL;
502 }
503 }
504
505 /*
506 * Find an affinity set in the parent task with the given affinity tag.
507 * The caller must have the space locked.
508 */
509 static affinity_set_t
510 affinity_set_find(affinity_space_t space, uint32_t tag)
511 {
512 affinity_set_t aset;
513
514 queue_iterate(&space->aspc_affinities,
515 aset, affinity_set_t, aset_affinities) {
516 if (aset->aset_tag == tag) {
517 DBG("affinity_set_find(%p,%u) finds %p\n",
518 space, tag, aset);
519 return aset;
520 }
521 }
522 DBG("affinity_set_find(%p,%u) not found\n", space, tag);
523 return NULL;
524 }
525
526 /*
527 * affinity_set_place() assigns an affinity set to a suitable processor_set.
528 * The selection criteria is:
529 * - the set currently occupied by the least number of affinities
530 * belonging to the owning the task.
531 * The caller must have the space locked.
532 */
533 static void
534 affinity_set_place(affinity_space_t aspc, affinity_set_t new_aset)
535 {
536 unsigned short set_occupancy[MAX_CPUS] = { 0 };
537 unsigned num_cpu_asets = ml_get_max_affinity_sets();
538 unsigned i_least_occupied;
539 affinity_set_t aset;
540
541 if (__improbable(num_cpu_asets > MAX_CPUS)) {
542 // If this triggers then the array needs to be made bigger.
543 panic("num_cpu_asets = %d > %d too big in %s\n", num_cpu_asets, MAX_CPUS, __FUNCTION__);
544 }
545
546 /*
547 * Scan the affinity sets calculating the number of sets
548 * occupy the available physical affinities.
549 */
550 queue_iterate(&aspc->aspc_affinities,
551 aset, affinity_set_t, aset_affinities) {
552 if (aset->aset_num < num_cpu_asets) {
553 set_occupancy[aset->aset_num]++;
554 } else {
555 panic("aset_num = %d in %s\n", aset->aset_num, __FUNCTION__);
556 }
557 }
558
559 /*
560 * Find the least occupied set (or the first empty set).
561 * To distribute placements somewhat, start searching from
562 * a cpu affinity chosen randomly per namespace:
563 * [(unsigned int)aspc % 127] % num_cpu_asets
564 * unless this mapping policy is overridden.
565 */
566 if (affinity_sets_mapping == 0) {
567 i_least_occupied = 0;
568 } else {
569 i_least_occupied = (unsigned int)(((uintptr_t)aspc % 127) % num_cpu_asets);
570 }
571 for (unsigned i = 0; i < num_cpu_asets; i++) {
572 unsigned int j = (i_least_occupied + i) % num_cpu_asets;
573 if (set_occupancy[j] == 0) {
574 i_least_occupied = j;
575 break;
576 }
577 if (set_occupancy[j] < set_occupancy[i_least_occupied]) {
578 i_least_occupied = j;
579 }
580 }
581 new_aset->aset_num = i_least_occupied;
582 new_aset->aset_pset = ml_affinity_to_pset(i_least_occupied);
583
584 /* Add the new affinity set to the group */
585 new_aset->aset_space = aspc;
586 queue_enter(&aspc->aspc_affinities,
587 new_aset, affinity_set_t, aset_affinities);
588
589 DBG("affinity_set_place(%p,%p) selected affinity %u pset %p\n",
590 aspc, new_aset, new_aset->aset_num, new_aset->aset_pset);
591 }