]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/sched_multiq.c
24df80d882454719a75c38626c1fbd0b40daad59
[apple/xnu.git] / osfmk / kern / sched_multiq.c
1 /*
2 * Copyright (c) 2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/machine.h>
31
32 #include <machine/machine_routines.h>
33 #include <machine/sched_param.h>
34 #include <machine/machine_cpu.h>
35
36 #include <kern/kern_types.h>
37 #include <kern/debug.h>
38 #include <kern/mach_param.h>
39 #include <kern/machine.h>
40 #include <kern/misc_protos.h>
41 #include <kern/processor.h>
42 #include <kern/queue.h>
43 #include <kern/sched.h>
44 #include <kern/sched_prim.h>
45 #include <kern/task.h>
46 #include <kern/thread.h>
47
48 #include <sys/kdebug.h>
49
50 /*
51 * Theory Statement
52 *
53 * How does the task scheduler work?
54 *
55 * It schedules threads across a few levels.
56 *
57 * RT threads are dealt with above us
58 * Bound threads go into the per-processor runq
59 * Non-bound threads are linked on their task's sched_group's runq
60 * sched_groups' sched_entries are linked on the pset's runq
61 *
62 * TODO: make this explicit - bound threads should have a different enqueue fxn
63 *
64 * When we choose a new thread, we will decide whether to look at the bound runqueue, the global runqueue
65 * or the current group's runqueue, then dequeue the next thread in that runqueue.
66 *
67 * We then manipulate the sched_entries to reflect the invariant that:
68 * Each non-empty priority level in a group's runq is represented by one sched_entry enqueued in the global
69 * runqueue.
70 *
71 * A sched_entry represents a chance at running - for each priority in each task, there is one chance of getting
72 * to run. This reduces the excess contention bonus given to processes which have work spread among many threads
73 * as compared to processes which do the same amount of work under fewer threads.
74 *
75 * NOTE: Currently, the multiq scheduler only supports one pset.
76 *
77 * NOTE ABOUT thread->sched_pri:
78 *
79 * It can change after enqueue - it's changed without pset lock but with thread lock if thread->runq is 0.
80 * Therefore we can only depend on it not changing during the enqueue and remove path, not the dequeue.
81 *
82 * TODO: Future features:
83 *
84 * Decouple the task priority from the sched_entry priority, allowing for:
85 * fast task priority change without having to iterate and re-dispatch all threads in the task.
86 * i.e. task-wide priority, task-wide boosting
87 * fancier group decay features
88 *
89 * Group (or task) decay:
90 * Decay is used for a few different things:
91 * Prioritizing latency-needing threads over throughput-needing threads for time-to-running
92 * Balancing work between threads in a process
93 * Balancing work done at the same priority between different processes
94 * Recovering from priority inversions between two threads in the same process
95 * Recovering from priority inversions between two threads in different processes
96 * Simulating a proportional share scheduler by allowing lower priority threads
97 * to run for a certain percentage of the time
98 *
99 * Task decay lets us separately address the 'same process' and 'different process' needs,
100 * which will allow us to make smarter tradeoffs in different cases.
101 * For example, we could resolve priority inversion in the same process by reordering threads without dropping the
102 * process below low priority threads in other processes.
103 *
104 * One lock to rule them all (or at least all the runqueues) instead of the pset locks
105 *
106 * Shrink sched_entry size to the size of a queue_chain_t by inferring priority, group, and perhaps runq field.
107 * The entries array is 5K currently so it'd be really great to reduce.
108 * One way to get sched_group below 4K without a new runq structure would be to remove the extra queues above realtime.
109 *
110 * When preempting a processor, store a flag saying if the preemption
111 * was from a thread in the same group or different group,
112 * and tell choose_thread about it.
113 *
114 * When choosing a processor, bias towards those running in the same
115 * group as I am running (at the same priority, or within a certain band?).
116 *
117 * Decide if we need to support psets.
118 * Decide how to support psets - do we need duplicate entries for each pset,
119 * or can we get away with putting the entry in either one or the other pset?
120 *
121 * Consider the right way to handle runq count - I don't want to iterate groups.
122 * Perhaps keep a global counter.
123 * Alternate option - remove it from choose_processor. It doesn't add much value
124 * now that we have global runq.
125 *
126 * Need a better way of finding group to target instead of looking at current_task.
127 * Perhaps choose_thread could pass in the current thread?
128 *
129 * Consider unifying runq copy-pastes.
130 *
131 * Thoughts on having a group central quantum bucket:
132 *
133 * I see two algorithms to decide quanta:
134 * A) Hand off only when switching thread to thread in the same group
135 * B) Allocate and return quanta to the group's pool
136 *
137 * Issues:
138 * If a task blocks completely, should it come back with the leftover quanta
139 * or brand new quanta?
140 *
141 * Should I put a flag saying zero out a quanta you grab when youre dispatched'?
142 *
143 * Resolution:
144 * Handing off quanta between threads will help with jumping around in the current task
145 * but will not help when a thread from a different task is involved.
146 * Need an algorithm that works with round robin-ing between threads in different tasks
147 *
148 * But wait - round robining can only be triggered by quantum expire or blocking.
149 * We need something that works with preemption or yielding - that's the more interesting idea.
150 *
151 * Existing algorithm - preemption doesn't re-set quantum, puts thread on head of runq.
152 * Blocking or quantum expiration does re-set quantum, puts thread on tail of runq.
153 *
154 * New algorithm -
155 * Hand off quanta when hopping between threads with same sched_group
156 * Even if thread was blocked it uses last thread remaining quanta when it starts.
157 *
158 * If we use the only cycle entry at quantum algorithm, then the quantum pool starts getting
159 * interesting.
160 *
161 * A thought - perhaps the handoff approach doesn't work so well in the presence of
162 * non-handoff wakeups i.e. wake other thread then wait then block - doesn't mean that
163 * woken thread will be what I switch to - other processor may have stolen it.
164 * What do we do there?
165 *
166 * Conclusions:
167 * We currently don't know of a scenario where quantum buckets on the task is beneficial.
168 * We will instead handoff quantum between threads in the task, and keep quantum
169 * on the preempted thread if it's preempted by something outside the task.
170 *
171 */
172
173 #if DEBUG || DEVELOPMENT
174 #define MULTIQ_SANITY_CHECK
175 #endif
176
177 typedef struct sched_entry {
178 queue_chain_t entry_links;
179 int16_t sched_pri; /* scheduled (current) priority */
180 int16_t runq;
181 int32_t pad;
182 } *sched_entry_t;
183
184 typedef run_queue_t entry_queue_t; /* A run queue that holds sched_entries instead of threads */
185 typedef run_queue_t group_runq_t; /* A run queue that is part of a sched_group */
186
187 #define SCHED_ENTRY_NULL ((sched_entry_t) 0)
188 #define MULTIQ_ERUNQ (-4) /* Indicates entry is on the main runq */
189
190 /* Each level in the run queue corresponds to one entry in the entries array */
191 struct sched_group {
192 struct sched_entry entries[NRQS];
193 struct run_queue runq;
194 queue_chain_t sched_groups;
195 };
196
197 /*
198 * Keep entry on the head of the runqueue while dequeueing threads.
199 * Only cycle it to the end of the runqueue when a thread in the task
200 * hits its quantum.
201 */
202 static boolean_t deep_drain = FALSE;
203
204 /* Verify the consistency of the runq before touching it */
205 static boolean_t multiq_sanity_check = FALSE;
206
207 /*
208 * Draining threads from the current task is preferred
209 * when they're less than X steps below the current
210 * global highest priority
211 */
212 #define DEFAULT_DRAIN_BAND_LIMIT MAXPRI
213 static integer_t drain_band_limit;
214
215 /*
216 * Don't go below this priority level if there is something above it in another task
217 */
218 #define DEFAULT_DRAIN_DEPTH_LIMIT MAXPRI_THROTTLE
219 static integer_t drain_depth_limit;
220
221 /*
222 * Don't favor the task when there's something above this priority in another task.
223 */
224 #define DEFAULT_DRAIN_CEILING BASEPRI_FOREGROUND
225 static integer_t drain_ceiling;
226
227 static struct zone *sched_group_zone;
228
229 static uint64_t num_sched_groups = 0;
230 static queue_head_t sched_groups;
231
232 static lck_attr_t sched_groups_lock_attr;
233 static lck_grp_t sched_groups_lock_grp;
234 static lck_grp_attr_t sched_groups_lock_grp_attr;
235
236 static lck_mtx_t sched_groups_lock;
237
238
239 static void
240 sched_multiq_init(void);
241
242 static thread_t
243 sched_multiq_steal_thread(processor_set_t pset);
244
245 static void
246 sched_multiq_thread_update_scan(sched_update_scan_context_t scan_context);
247
248 static boolean_t
249 sched_multiq_processor_enqueue(processor_t processor, thread_t thread, integer_t options);
250
251 static boolean_t
252 sched_multiq_processor_queue_remove(processor_t processor, thread_t thread);
253
254 void
255 sched_multiq_quantum_expire(thread_t thread);
256
257 static ast_t
258 sched_multiq_processor_csw_check(processor_t processor);
259
260 static boolean_t
261 sched_multiq_processor_queue_has_priority(processor_t processor, int priority, boolean_t gte);
262
263 static int
264 sched_multiq_runq_count(processor_t processor);
265
266 static boolean_t
267 sched_multiq_processor_queue_empty(processor_t processor);
268
269 static uint64_t
270 sched_multiq_runq_stats_count_sum(processor_t processor);
271
272 static int
273 sched_multiq_processor_bound_count(processor_t processor);
274
275 static void
276 sched_multiq_pset_init(processor_set_t pset);
277
278 static void
279 sched_multiq_processor_init(processor_t processor);
280
281 static thread_t
282 sched_multiq_choose_thread(processor_t processor, int priority, ast_t reason);
283
284 static void
285 sched_multiq_processor_queue_shutdown(processor_t processor);
286
287 static sched_mode_t
288 sched_multiq_initial_thread_sched_mode(task_t parent_task);
289
290 const struct sched_dispatch_table sched_multiq_dispatch = {
291 .sched_name = "multiq",
292 .init = sched_multiq_init,
293 .timebase_init = sched_timeshare_timebase_init,
294 .processor_init = sched_multiq_processor_init,
295 .pset_init = sched_multiq_pset_init,
296 .maintenance_continuation = sched_timeshare_maintenance_continue,
297 .choose_thread = sched_multiq_choose_thread,
298 .steal_thread_enabled = FALSE,
299 .steal_thread = sched_multiq_steal_thread,
300 .compute_timeshare_priority = sched_compute_timeshare_priority,
301 .choose_processor = choose_processor,
302 .processor_enqueue = sched_multiq_processor_enqueue,
303 .processor_queue_shutdown = sched_multiq_processor_queue_shutdown,
304 .processor_queue_remove = sched_multiq_processor_queue_remove,
305 .processor_queue_empty = sched_multiq_processor_queue_empty,
306 .priority_is_urgent = priority_is_urgent,
307 .processor_csw_check = sched_multiq_processor_csw_check,
308 .processor_queue_has_priority = sched_multiq_processor_queue_has_priority,
309 .initial_quantum_size = sched_timeshare_initial_quantum_size,
310 .initial_thread_sched_mode = sched_multiq_initial_thread_sched_mode,
311 .can_update_priority = can_update_priority,
312 .update_priority = update_priority,
313 .lightweight_update_priority = lightweight_update_priority,
314 .quantum_expire = sched_multiq_quantum_expire,
315 .processor_runq_count = sched_multiq_runq_count,
316 .processor_runq_stats_count_sum = sched_multiq_runq_stats_count_sum,
317 .processor_bound_count = sched_multiq_processor_bound_count,
318 .thread_update_scan = sched_multiq_thread_update_scan,
319 .direct_dispatch_to_idle_processors = FALSE,
320 .multiple_psets_enabled = FALSE,
321 .sched_groups_enabled = TRUE,
322 .avoid_processor_enabled = FALSE,
323 .thread_avoid_processor = NULL,
324 .processor_balance = sched_SMT_balance,
325
326 .rt_runq = sched_rtglobal_runq,
327 .rt_init = sched_rtglobal_init,
328 .rt_queue_shutdown = sched_rtglobal_queue_shutdown,
329 .rt_runq_scan = sched_rtglobal_runq_scan,
330 .rt_runq_count_sum = sched_rtglobal_runq_count_sum,
331
332 .qos_max_parallelism = sched_qos_max_parallelism,
333 .check_spill = sched_check_spill,
334 .ipi_policy = sched_ipi_policy,
335 .thread_should_yield = sched_thread_should_yield,
336 };
337
338
339 static void
340 sched_multiq_init(void)
341 {
342 #if defined(MULTIQ_SANITY_CHECK)
343 PE_parse_boot_argn("-multiq-sanity-check", &multiq_sanity_check, sizeof(multiq_sanity_check));
344 #endif
345
346 PE_parse_boot_argn("-multiq-deep-drain", &deep_drain, sizeof(deep_drain));
347
348 if (!PE_parse_boot_argn("multiq_drain_ceiling", &drain_ceiling, sizeof(drain_ceiling))) {
349 drain_ceiling = DEFAULT_DRAIN_CEILING;
350 }
351
352 if (!PE_parse_boot_argn("multiq_drain_depth_limit", &drain_depth_limit, sizeof(drain_depth_limit))) {
353 drain_depth_limit = DEFAULT_DRAIN_DEPTH_LIMIT;
354 }
355
356 if (!PE_parse_boot_argn("multiq_drain_band_limit", &drain_band_limit, sizeof(drain_band_limit))) {
357 drain_band_limit = DEFAULT_DRAIN_BAND_LIMIT;
358 }
359
360 printf("multiq scheduler config: deep-drain %d, ceiling %d, depth limit %d, band limit %d, sanity check %d\n",
361 deep_drain, drain_ceiling, drain_depth_limit, drain_band_limit, multiq_sanity_check);
362
363 sched_group_zone = zinit(
364 sizeof(struct sched_group),
365 task_max * sizeof(struct sched_group),
366 PAGE_SIZE,
367 "sched groups");
368
369 zone_change(sched_group_zone, Z_NOENCRYPT, TRUE);
370 zone_change(sched_group_zone, Z_NOCALLOUT, TRUE);
371
372 queue_init(&sched_groups);
373
374 lck_grp_attr_setdefault(&sched_groups_lock_grp_attr);
375 lck_grp_init(&sched_groups_lock_grp, "sched_groups", &sched_groups_lock_grp_attr);
376 lck_attr_setdefault(&sched_groups_lock_attr);
377 lck_mtx_init(&sched_groups_lock, &sched_groups_lock_grp, &sched_groups_lock_attr);
378
379 sched_timeshare_init();
380 }
381
382 static void
383 sched_multiq_processor_init(processor_t processor)
384 {
385 run_queue_init(&processor->runq);
386 }
387
388 static void
389 sched_multiq_pset_init(processor_set_t pset)
390 {
391 run_queue_init(&pset->pset_runq);
392 }
393
394 static sched_mode_t
395 sched_multiq_initial_thread_sched_mode(task_t parent_task)
396 {
397 if (parent_task == kernel_task)
398 return TH_MODE_FIXED;
399 else
400 return TH_MODE_TIMESHARE;
401 }
402
403 sched_group_t
404 sched_group_create(void)
405 {
406 sched_group_t sched_group;
407
408 if (!SCHED(sched_groups_enabled))
409 return SCHED_GROUP_NULL;
410
411 sched_group = (sched_group_t)zalloc(sched_group_zone);
412
413 bzero(sched_group, sizeof(struct sched_group));
414
415 run_queue_init(&sched_group->runq);
416
417 for (int i = 0; i < NRQS; i++) {
418 sched_group->entries[i].runq = 0;
419 sched_group->entries[i].sched_pri = i;
420 }
421
422 lck_mtx_lock(&sched_groups_lock);
423 queue_enter(&sched_groups, sched_group, sched_group_t, sched_groups);
424 num_sched_groups++;
425 lck_mtx_unlock(&sched_groups_lock);
426
427 return (sched_group);
428 }
429
430 void
431 sched_group_destroy(sched_group_t sched_group)
432 {
433 if (!SCHED(sched_groups_enabled)) {
434 assert(sched_group == SCHED_GROUP_NULL);
435 return;
436 }
437
438 assert(sched_group != SCHED_GROUP_NULL);
439 assert(sched_group->runq.count == 0);
440
441 for (int i = 0; i < NRQS; i++) {
442 assert(sched_group->entries[i].runq == 0);
443 assert(sched_group->entries[i].sched_pri == i);
444 }
445
446 lck_mtx_lock(&sched_groups_lock);
447 queue_remove(&sched_groups, sched_group, sched_group_t, sched_groups);
448 num_sched_groups--;
449 lck_mtx_unlock(&sched_groups_lock);
450
451 zfree(sched_group_zone, sched_group);
452 }
453
454 __attribute__((always_inline))
455 static inline entry_queue_t
456 multiq_main_entryq(processor_t processor)
457 {
458 return (entry_queue_t)&processor->processor_set->pset_runq;
459 }
460
461 __attribute__((always_inline))
462 static inline run_queue_t
463 multiq_bound_runq(processor_t processor)
464 {
465 return &processor->runq;
466 }
467
468 __attribute__((always_inline))
469 static inline sched_entry_t
470 group_entry_for_pri(sched_group_t group, integer_t pri)
471 {
472 return &group->entries[pri];
473 }
474
475 __attribute__((always_inline))
476 static inline sched_group_t
477 group_for_entry(sched_entry_t entry)
478 {
479 #pragma clang diagnostic push
480 #pragma clang diagnostic ignored "-Wcast-align"
481 sched_group_t group = (sched_group_t)(entry - entry->sched_pri);
482 #pragma clang diagnostic pop
483 return group;
484 }
485
486 /* Peek at the head of the runqueue */
487 static sched_entry_t
488 entry_queue_first_entry(entry_queue_t rq)
489 {
490 assert(rq->count != 0);
491
492 queue_t queue = &rq->queues[rq->highq];
493
494 sched_entry_t entry = qe_queue_first(queue, struct sched_entry, entry_links);
495
496 assert(entry->sched_pri == rq->highq);
497
498 return entry;
499 }
500
501 #if defined(MULTIQ_SANITY_CHECK)
502
503 #if MACH_ASSERT
504 __attribute__((always_inline))
505 static inline boolean_t
506 queue_chain_linked(queue_chain_t* chain)
507 {
508 if (chain->next != NULL) {
509 assert(chain->prev != NULL);
510 return TRUE;
511 } else {
512 assert(chain->prev == NULL);
513 return FALSE;
514 }
515 }
516 #endif /* MACH_ASSERT */
517
518 static thread_t
519 group_first_thread(sched_group_t group)
520 {
521 group_runq_t rq = &group->runq;
522
523 assert(rq->count != 0);
524
525 queue_t queue = &rq->queues[rq->highq];
526
527 thread_t thread = qe_queue_first(queue, struct thread, runq_links);
528
529 assert(thread != THREAD_NULL);
530 assert_thread_magic(thread);
531
532 assert(thread->sched_group == group);
533
534 /* TODO: May not be safe */
535 assert(thread->sched_pri == rq->highq);
536
537 return thread;
538 }
539
540 /* Asserts if entry is not in entry runq at pri */
541 static void
542 entry_queue_check_entry(entry_queue_t runq, sched_entry_t entry, int expected_pri)
543 {
544 queue_t q;
545 sched_entry_t elem;
546
547 assert(queue_chain_linked(&entry->entry_links));
548 assert(entry->runq == MULTIQ_ERUNQ);
549
550 q = &runq->queues[expected_pri];
551
552 qe_foreach_element(elem, q, entry_links) {
553 if (elem == entry)
554 return;
555 }
556
557 panic("runq %p doesn't contain entry %p at pri %d", runq, entry, expected_pri);
558 }
559
560 /* Asserts if thread is not in group at its priority */
561 static void
562 sched_group_check_thread(sched_group_t group, thread_t thread)
563 {
564 queue_t q;
565 thread_t elem;
566 int pri = thread->sched_pri;
567
568 assert(thread->runq != PROCESSOR_NULL);
569
570 q = &group->runq.queues[pri];
571
572 qe_foreach_element(elem, q, runq_links) {
573 if (elem == thread)
574 return;
575 }
576
577 panic("group %p doesn't contain thread %p at pri %d", group, thread, pri);
578 }
579
580 static void
581 global_check_entry_queue(entry_queue_t main_entryq)
582 {
583 if (main_entryq->count == 0)
584 return;
585
586 sched_entry_t entry = entry_queue_first_entry(main_entryq);
587
588 assert(entry->runq == MULTIQ_ERUNQ);
589
590 sched_group_t group = group_for_entry(entry);
591
592 thread_t thread = group_first_thread(group);
593
594 __assert_only sched_entry_t thread_entry = group_entry_for_pri(thread->sched_group, thread->sched_pri);
595
596 assert(entry->sched_pri == group->runq.highq);
597
598 assert(entry == thread_entry);
599 assert(thread->runq != PROCESSOR_NULL);
600 }
601
602 static void
603 group_check_run_queue(entry_queue_t main_entryq, sched_group_t group)
604 {
605 if (group->runq.count == 0)
606 return;
607
608 thread_t thread = group_first_thread(group);
609
610 assert(thread->runq != PROCESSOR_NULL);
611
612 sched_entry_t sched_entry = group_entry_for_pri(thread->sched_group, thread->sched_pri);
613
614 entry_queue_check_entry(main_entryq, sched_entry, thread->sched_pri);
615
616 assert(sched_entry->sched_pri == thread->sched_pri);
617 assert(sched_entry->runq == MULTIQ_ERUNQ);
618 }
619
620 #endif /* defined(MULTIQ_SANITY_CHECK) */
621
622 /*
623 * The run queue must not be empty.
624 */
625 static sched_entry_t
626 entry_queue_dequeue_entry(entry_queue_t rq)
627 {
628 sched_entry_t sched_entry;
629 queue_t queue = &rq->queues[rq->highq];
630
631 assert(rq->count > 0);
632 assert(!queue_empty(queue));
633
634 sched_entry = qe_dequeue_head(queue, struct sched_entry, entry_links);
635
636 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
637 rq->count--;
638 if (SCHED(priority_is_urgent)(rq->highq)) {
639 rq->urgency--; assert(rq->urgency >= 0);
640 }
641 if (queue_empty(queue)) {
642 rq_bitmap_clear(rq->bitmap, rq->highq);
643 rq->highq = bitmap_first(rq->bitmap, NRQS);
644 }
645
646 sched_entry->runq = 0;
647
648 return (sched_entry);
649 }
650
651 /*
652 * The run queue must not be empty.
653 */
654 static boolean_t
655 entry_queue_enqueue_entry(
656 entry_queue_t rq,
657 sched_entry_t entry,
658 integer_t options)
659 {
660 int sched_pri = entry->sched_pri;
661 queue_t queue = &rq->queues[sched_pri];
662 boolean_t result = FALSE;
663
664 assert(entry->runq == 0);
665
666 if (queue_empty(queue)) {
667 enqueue_tail(queue, &entry->entry_links);
668
669 rq_bitmap_set(rq->bitmap, sched_pri);
670 if (sched_pri > rq->highq) {
671 rq->highq = sched_pri;
672 result = TRUE;
673 }
674 } else {
675 if (options & SCHED_TAILQ)
676 enqueue_tail(queue, &entry->entry_links);
677 else
678 enqueue_head(queue, &entry->entry_links);
679 }
680 if (SCHED(priority_is_urgent)(sched_pri))
681 rq->urgency++;
682 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
683 rq->count++;
684
685 entry->runq = MULTIQ_ERUNQ;
686
687 return (result);
688 }
689
690 /*
691 * The entry must be in this runqueue.
692 */
693 static void
694 entry_queue_remove_entry(
695 entry_queue_t rq,
696 sched_entry_t entry)
697 {
698 int sched_pri = entry->sched_pri;
699
700 #if defined(MULTIQ_SANITY_CHECK)
701 if (multiq_sanity_check) {
702 entry_queue_check_entry(rq, entry, sched_pri);
703 }
704 #endif
705
706 remqueue(&entry->entry_links);
707
708 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
709 rq->count--;
710 if (SCHED(priority_is_urgent)(sched_pri)) {
711 rq->urgency--; assert(rq->urgency >= 0);
712 }
713
714 if (queue_empty(&rq->queues[sched_pri])) {
715 /* update run queue status */
716 rq_bitmap_clear(rq->bitmap, sched_pri);
717 rq->highq = bitmap_first(rq->bitmap, NRQS);
718 }
719
720 entry->runq = 0;
721 }
722
723 static void
724 entry_queue_change_entry(
725 entry_queue_t rq,
726 sched_entry_t entry,
727 integer_t options)
728 {
729 int sched_pri = entry->sched_pri;
730 queue_t queue = &rq->queues[sched_pri];
731
732 #if defined(MULTIQ_SANITY_CHECK)
733 if (multiq_sanity_check) {
734 entry_queue_check_entry(rq, entry, sched_pri);
735 }
736 #endif
737
738 if (options & SCHED_TAILQ)
739 re_queue_tail(queue, &entry->entry_links);
740 else
741 re_queue_head(queue, &entry->entry_links);
742 }
743 /*
744 * The run queue must not be empty.
745 *
746 * sets queue_empty to TRUE if queue is now empty at thread_pri
747 */
748 static thread_t
749 group_run_queue_dequeue_thread(
750 group_runq_t rq,
751 integer_t *thread_pri,
752 boolean_t *queue_empty)
753 {
754 thread_t thread;
755 queue_t queue = &rq->queues[rq->highq];
756
757 assert(rq->count > 0);
758 assert(!queue_empty(queue));
759
760 *thread_pri = rq->highq;
761
762 thread = qe_dequeue_head(queue, struct thread, runq_links);
763 assert_thread_magic(thread);
764
765 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
766 rq->count--;
767 if (SCHED(priority_is_urgent)(rq->highq)) {
768 rq->urgency--; assert(rq->urgency >= 0);
769 }
770 if (queue_empty(queue)) {
771 rq_bitmap_clear(rq->bitmap, rq->highq);
772 rq->highq = bitmap_first(rq->bitmap, NRQS);
773 *queue_empty = TRUE;
774 } else {
775 *queue_empty = FALSE;
776 }
777
778 return thread;
779 }
780
781 /*
782 * The run queue must not be empty.
783 * returns TRUE if queue was empty at thread_pri
784 */
785 static boolean_t
786 group_run_queue_enqueue_thread(
787 group_runq_t rq,
788 thread_t thread,
789 integer_t thread_pri,
790 integer_t options)
791 {
792 queue_t queue = &rq->queues[thread_pri];
793 boolean_t result = FALSE;
794
795 assert(thread->runq == PROCESSOR_NULL);
796 assert_thread_magic(thread);
797
798 if (queue_empty(queue)) {
799 enqueue_tail(queue, &thread->runq_links);
800
801 rq_bitmap_set(rq->bitmap, thread_pri);
802 if (thread_pri > rq->highq) {
803 rq->highq = thread_pri;
804 }
805 result = TRUE;
806 } else {
807 if (options & SCHED_TAILQ)
808 enqueue_tail(queue, &thread->runq_links);
809 else
810 enqueue_head(queue, &thread->runq_links);
811 }
812 if (SCHED(priority_is_urgent)(thread_pri))
813 rq->urgency++;
814 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
815 rq->count++;
816
817 return (result);
818 }
819
820 /*
821 * The thread must be in this runqueue.
822 * returns TRUE if queue is now empty at thread_pri
823 */
824 static boolean_t
825 group_run_queue_remove_thread(
826 group_runq_t rq,
827 thread_t thread,
828 integer_t thread_pri)
829 {
830 boolean_t result = FALSE;
831
832 assert_thread_magic(thread);
833 assert(thread->runq != PROCESSOR_NULL);
834
835 remqueue(&thread->runq_links);
836
837 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
838 rq->count--;
839 if (SCHED(priority_is_urgent)(thread_pri)) {
840 rq->urgency--; assert(rq->urgency >= 0);
841 }
842
843 if (queue_empty(&rq->queues[thread_pri])) {
844 /* update run queue status */
845 rq_bitmap_clear(rq->bitmap, thread_pri);
846 rq->highq = bitmap_first(rq->bitmap, NRQS);
847 result = TRUE;
848 }
849
850 thread->runq = PROCESSOR_NULL;
851
852 return result;
853 }
854
855 /*
856 * A thread's sched pri may change out from under us because
857 * we're clearing thread->runq here without the thread locked.
858 * Do not rely on it to be the same as when we enqueued.
859 */
860 static thread_t
861 sched_global_dequeue_thread(entry_queue_t main_entryq)
862 {
863 boolean_t pri_level_empty = FALSE;
864 sched_entry_t entry;
865 group_runq_t group_runq;
866 thread_t thread;
867 integer_t thread_pri;
868 sched_group_t group;
869
870 assert(main_entryq->count > 0);
871
872 entry = entry_queue_dequeue_entry(main_entryq);
873
874 group = group_for_entry(entry);
875 group_runq = &group->runq;
876
877 thread = group_run_queue_dequeue_thread(group_runq, &thread_pri, &pri_level_empty);
878
879 thread->runq = PROCESSOR_NULL;
880
881 if (!pri_level_empty) {
882 entry_queue_enqueue_entry(main_entryq, entry, SCHED_TAILQ);
883 }
884
885 return thread;
886 }
887
888 /* Dequeue a thread from the global runq without moving the entry */
889 static thread_t
890 sched_global_deep_drain_dequeue_thread(entry_queue_t main_entryq)
891 {
892 boolean_t pri_level_empty = FALSE;
893 sched_entry_t entry;
894 group_runq_t group_runq;
895 thread_t thread;
896 integer_t thread_pri;
897 sched_group_t group;
898
899 assert(main_entryq->count > 0);
900
901 entry = entry_queue_first_entry(main_entryq);
902
903 group = group_for_entry(entry);
904 group_runq = &group->runq;
905
906 thread = group_run_queue_dequeue_thread(group_runq, &thread_pri, &pri_level_empty);
907
908 thread->runq = PROCESSOR_NULL;
909
910 if (pri_level_empty) {
911 entry_queue_remove_entry(main_entryq, entry);
912 }
913
914 return thread;
915 }
916
917
918 static thread_t
919 sched_group_dequeue_thread(
920 entry_queue_t main_entryq,
921 sched_group_t group)
922 {
923 group_runq_t group_runq = &group->runq;
924 boolean_t pri_level_empty = FALSE;
925 thread_t thread;
926 integer_t thread_pri;
927
928 thread = group_run_queue_dequeue_thread(group_runq, &thread_pri, &pri_level_empty);
929
930 thread->runq = PROCESSOR_NULL;
931
932 if (pri_level_empty) {
933 entry_queue_remove_entry(main_entryq, group_entry_for_pri(group, thread_pri));
934 }
935
936 return thread;
937 }
938
939 static void
940 sched_group_remove_thread(
941 entry_queue_t main_entryq,
942 sched_group_t group,
943 thread_t thread)
944 {
945 integer_t thread_pri = thread->sched_pri;
946 sched_entry_t sched_entry = group_entry_for_pri(group, thread_pri);
947
948 #if defined(MULTIQ_SANITY_CHECK)
949 if (multiq_sanity_check) {
950 global_check_entry_queue(main_entryq);
951 group_check_run_queue(main_entryq, group);
952
953 sched_group_check_thread(group, thread);
954 entry_queue_check_entry(main_entryq, sched_entry, thread_pri);
955 }
956 #endif
957
958 boolean_t pri_level_empty = group_run_queue_remove_thread(&group->runq, thread, thread_pri);
959
960 if (pri_level_empty) {
961 entry_queue_remove_entry(main_entryq, sched_entry);
962 }
963
964 #if defined(MULTIQ_SANITY_CHECK)
965 if (multiq_sanity_check) {
966 global_check_entry_queue(main_entryq);
967 group_check_run_queue(main_entryq, group);
968 }
969 #endif
970 }
971
972 static void
973 sched_group_enqueue_thread(
974 entry_queue_t main_entryq,
975 sched_group_t group,
976 thread_t thread,
977 integer_t options)
978 {
979 #if defined(MULTIQ_SANITY_CHECK)
980 if (multiq_sanity_check) {
981 global_check_entry_queue(main_entryq);
982 group_check_run_queue(main_entryq, group);
983 }
984 #endif
985
986 int sched_pri = thread->sched_pri;
987
988 boolean_t pri_level_was_empty = group_run_queue_enqueue_thread(&group->runq, thread, sched_pri, options);
989
990 if (pri_level_was_empty) {
991 /*
992 * TODO: Need to figure out if passing options here is a good idea or not
993 * What effects would it have?
994 */
995 entry_queue_enqueue_entry(main_entryq, &group->entries[sched_pri], options);
996 } else if (options & SCHED_HEADQ) {
997 /* The thread should be at the head of the line - move its entry to the front */
998 entry_queue_change_entry(main_entryq, &group->entries[sched_pri], options);
999 }
1000 }
1001
1002 /*
1003 * Locate a thread to execute from the run queue and return it.
1004 * Only choose a thread with greater or equal priority.
1005 *
1006 * pset is locked, thread is not locked.
1007 *
1008 * Returns THREAD_NULL if it cannot find a valid thread.
1009 *
1010 * Note: we cannot rely on the value of thread->sched_pri in this path because
1011 * we don't have the thread locked.
1012 *
1013 * TODO: Remove tracepoints
1014 */
1015 static thread_t
1016 sched_multiq_choose_thread(
1017 processor_t processor,
1018 int priority,
1019 ast_t reason)
1020 {
1021 entry_queue_t main_entryq = multiq_main_entryq(processor);
1022 run_queue_t bound_runq = multiq_bound_runq(processor);
1023
1024 boolean_t choose_bound_runq = FALSE;
1025
1026 if (bound_runq->highq < priority &&
1027 main_entryq->highq < priority)
1028 return THREAD_NULL;
1029
1030 if (bound_runq->count && main_entryq->count) {
1031 if (bound_runq->highq >= main_entryq->highq) {
1032 choose_bound_runq = TRUE;
1033 } else {
1034 /* Use main runq */
1035 }
1036 } else if (bound_runq->count) {
1037 choose_bound_runq = TRUE;
1038 } else if (main_entryq->count) {
1039 /* Use main runq */
1040 } else {
1041 return (THREAD_NULL);
1042 }
1043
1044 if (choose_bound_runq) {
1045 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1046 MACHDBG_CODE(DBG_MACH_SCHED, MACH_MULTIQ_DEQUEUE) | DBG_FUNC_NONE,
1047 MACH_MULTIQ_BOUND, main_entryq->highq, bound_runq->highq, 0, 0);
1048
1049 return run_queue_dequeue(bound_runq, SCHED_HEADQ);
1050 }
1051
1052 sched_group_t group = current_thread()->sched_group;
1053
1054 #if defined(MULTIQ_SANITY_CHECK)
1055 if (multiq_sanity_check) {
1056 global_check_entry_queue(main_entryq);
1057 group_check_run_queue(main_entryq, group);
1058 }
1059 #endif
1060
1061 /*
1062 * Determine if we should look at the group or the global queue
1063 *
1064 * TODO:
1065 * Perhaps pass reason as a 'should look inside' argument to choose_thread
1066 * Should YIELD AST override drain limit?
1067 */
1068 if (group->runq.count != 0 && (reason & AST_PREEMPTION) == 0) {
1069 boolean_t favor_group = TRUE;
1070
1071 integer_t global_pri = main_entryq->highq;
1072 integer_t group_pri = group->runq.highq;
1073
1074 /*
1075 * Favor the current group if the group is still the globally highest.
1076 *
1077 * Otherwise, consider choosing a thread from the current group
1078 * even if it's lower priority than the global highest priority.
1079 */
1080 if (global_pri > group_pri) {
1081 /*
1082 * If there's something elsewhere above the depth limit,
1083 * don't pick a thread below the limit.
1084 */
1085 if (global_pri > drain_depth_limit && group_pri <= drain_depth_limit)
1086 favor_group = FALSE;
1087
1088 /*
1089 * If there's something at or above the ceiling,
1090 * don't favor the group.
1091 */
1092 if (global_pri >= drain_ceiling)
1093 favor_group = FALSE;
1094
1095 /*
1096 * Don't go more than X steps below the global highest
1097 */
1098 if ((global_pri - group_pri) >= drain_band_limit)
1099 favor_group = FALSE;
1100 }
1101
1102 if (favor_group) {
1103 /* Pull from local runq */
1104 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1105 MACHDBG_CODE(DBG_MACH_SCHED, MACH_MULTIQ_DEQUEUE) | DBG_FUNC_NONE,
1106 MACH_MULTIQ_GROUP, global_pri, group_pri, 0, 0);
1107
1108 return sched_group_dequeue_thread(main_entryq, group);
1109 }
1110 }
1111
1112 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1113 MACHDBG_CODE(DBG_MACH_SCHED, MACH_MULTIQ_DEQUEUE) | DBG_FUNC_NONE,
1114 MACH_MULTIQ_GLOBAL, main_entryq->highq, group->runq.highq, 0, 0);
1115
1116 /* Couldn't pull from local runq, pull from global runq instead */
1117 if (deep_drain) {
1118 return sched_global_deep_drain_dequeue_thread(main_entryq);
1119 } else {
1120 return sched_global_dequeue_thread(main_entryq);
1121 }
1122 }
1123
1124
1125 /*
1126 * Thread must be locked, and not already be on a run queue.
1127 * pset is locked.
1128 */
1129 static boolean_t
1130 sched_multiq_processor_enqueue(
1131 processor_t processor,
1132 thread_t thread,
1133 integer_t options)
1134 {
1135 boolean_t result;
1136
1137 assert(processor == thread->chosen_processor);
1138
1139 if (thread->bound_processor != PROCESSOR_NULL) {
1140 assert(thread->bound_processor == processor);
1141
1142 result = run_queue_enqueue(multiq_bound_runq(processor), thread, options);
1143 thread->runq = processor;
1144
1145 return result;
1146 }
1147
1148 sched_group_enqueue_thread(multiq_main_entryq(processor),
1149 thread->sched_group,
1150 thread, options);
1151
1152 thread->runq = processor;
1153
1154 return (FALSE);
1155 }
1156
1157 /*
1158 * Called in the context of thread with thread and pset unlocked,
1159 * after updating thread priority but before propagating that priority
1160 * to the processor
1161 */
1162 void
1163 sched_multiq_quantum_expire(thread_t thread)
1164 {
1165 if (deep_drain) {
1166 /*
1167 * Move the entry at this priority to the end of the queue,
1168 * to allow the next task a shot at running.
1169 */
1170
1171 processor_t processor = thread->last_processor;
1172 processor_set_t pset = processor->processor_set;
1173 entry_queue_t entryq = multiq_main_entryq(processor);
1174
1175 pset_lock(pset);
1176
1177 sched_entry_t entry = group_entry_for_pri(thread->sched_group, processor->current_pri);
1178
1179 if (entry->runq == MULTIQ_ERUNQ) {
1180 entry_queue_change_entry(entryq, entry, SCHED_TAILQ);
1181 }
1182
1183 pset_unlock(pset);
1184 }
1185 }
1186
1187 static boolean_t
1188 sched_multiq_processor_queue_empty(processor_t processor)
1189 {
1190 return multiq_main_entryq(processor)->count == 0 &&
1191 multiq_bound_runq(processor)->count == 0;
1192 }
1193
1194 static ast_t
1195 sched_multiq_processor_csw_check(processor_t processor)
1196 {
1197 boolean_t has_higher;
1198 int pri;
1199
1200 entry_queue_t main_entryq = multiq_main_entryq(processor);
1201 run_queue_t bound_runq = multiq_bound_runq(processor);
1202
1203 assert(processor->active_thread != NULL);
1204
1205 pri = MAX(main_entryq->highq, bound_runq->highq);
1206
1207 if (processor->first_timeslice) {
1208 has_higher = (pri > processor->current_pri);
1209 } else {
1210 has_higher = (pri >= processor->current_pri);
1211 }
1212
1213 if (has_higher) {
1214 if (main_entryq->urgency > 0)
1215 return (AST_PREEMPT | AST_URGENT);
1216
1217 if (bound_runq->urgency > 0)
1218 return (AST_PREEMPT | AST_URGENT);
1219
1220 return AST_PREEMPT;
1221 }
1222
1223 return AST_NONE;
1224 }
1225
1226 static boolean_t
1227 sched_multiq_processor_queue_has_priority(
1228 processor_t processor,
1229 int priority,
1230 boolean_t gte)
1231 {
1232 run_queue_t main_runq = multiq_main_entryq(processor);
1233 run_queue_t bound_runq = multiq_bound_runq(processor);
1234
1235 int qpri = MAX(main_runq->highq, bound_runq->highq);
1236
1237 if (gte)
1238 return qpri >= priority;
1239 else
1240 return qpri > priority;
1241 }
1242
1243 static int
1244 sched_multiq_runq_count(processor_t processor)
1245 {
1246 /*
1247 * TODO: Decide whether to keep a count of runnable threads in the pset
1248 * or just return something less than the true count.
1249 *
1250 * This needs to be fast, so no iterating the whole runq.
1251 *
1252 * Another possible decision is to remove this - with global runq
1253 * it doesn't make much sense.
1254 */
1255 return multiq_main_entryq(processor)->count + multiq_bound_runq(processor)->count;
1256 }
1257
1258 static uint64_t
1259 sched_multiq_runq_stats_count_sum(processor_t processor)
1260 {
1261 /*
1262 * TODO: This one does need to go through all the runqueues, but it's only needed for
1263 * the sched stats tool
1264 */
1265
1266 uint64_t bound_sum = multiq_bound_runq(processor)->runq_stats.count_sum;
1267
1268 if (processor->cpu_id == processor->processor_set->cpu_set_low)
1269 return bound_sum + multiq_main_entryq(processor)->runq_stats.count_sum;
1270 else
1271 return bound_sum;
1272 }
1273
1274 static int
1275 sched_multiq_processor_bound_count(processor_t processor)
1276 {
1277 return multiq_bound_runq(processor)->count;
1278 }
1279
1280 static void
1281 sched_multiq_processor_queue_shutdown(processor_t processor)
1282 {
1283 processor_set_t pset = processor->processor_set;
1284 entry_queue_t main_entryq = multiq_main_entryq(processor);
1285 thread_t thread;
1286 queue_head_t tqueue;
1287
1288 /* We only need to migrate threads if this is the last active processor in the pset */
1289 if (pset->online_processor_count > 0) {
1290 pset_unlock(pset);
1291 return;
1292 }
1293
1294 queue_init(&tqueue);
1295
1296 /* Note that we do not remove bound threads from the queues here */
1297
1298 while (main_entryq->count > 0) {
1299 thread = sched_global_dequeue_thread(main_entryq);
1300 enqueue_tail(&tqueue, &thread->runq_links);
1301 }
1302
1303 pset_unlock(pset);
1304
1305 qe_foreach_element_safe(thread, &tqueue, runq_links) {
1306
1307 remqueue(&thread->runq_links);
1308
1309 thread_lock(thread);
1310
1311 thread_setrun(thread, SCHED_TAILQ);
1312
1313 thread_unlock(thread);
1314 }
1315 }
1316
1317 /*
1318 * Thread is locked
1319 *
1320 * This is why we can never read sched_pri unless we have the thread locked.
1321 * Which we do in the enqueue and remove cases, but not the dequeue case.
1322 */
1323 static boolean_t
1324 sched_multiq_processor_queue_remove(
1325 processor_t processor,
1326 thread_t thread)
1327 {
1328 boolean_t removed = FALSE;
1329 processor_set_t pset = processor->processor_set;
1330
1331 pset_lock(pset);
1332
1333 if (thread->runq != PROCESSOR_NULL) {
1334 /*
1335 * Thread is on a run queue and we have a lock on
1336 * that run queue.
1337 */
1338
1339 assert(thread->runq == processor);
1340
1341 if (thread->bound_processor != PROCESSOR_NULL) {
1342 assert(processor == thread->bound_processor);
1343 run_queue_remove(multiq_bound_runq(processor), thread);
1344 thread->runq = PROCESSOR_NULL;
1345 } else {
1346 sched_group_remove_thread(multiq_main_entryq(processor),
1347 thread->sched_group,
1348 thread);
1349 }
1350
1351 removed = TRUE;
1352 }
1353
1354 pset_unlock(pset);
1355
1356 return removed;
1357 }
1358
1359 /* pset is locked, returned unlocked */
1360 static thread_t
1361 sched_multiq_steal_thread(processor_set_t pset)
1362 {
1363 pset_unlock(pset);
1364 return (THREAD_NULL);
1365 }
1366
1367 /*
1368 * Scan the global queue for candidate groups, and scan those groups for
1369 * candidate threads.
1370 *
1371 * TODO: This iterates every group runq in its entirety for each entry it has in the runq, which is O(N^2)
1372 * Instead, iterate only the queue in the group runq matching the priority of the entry.
1373 *
1374 * Returns TRUE if retry is needed.
1375 */
1376 static boolean_t
1377 group_scan(entry_queue_t runq, sched_update_scan_context_t scan_context) {
1378 int count = runq->count;
1379 int queue_index;
1380
1381 assert(count >= 0);
1382
1383 if (count == 0)
1384 return FALSE;
1385
1386 for (queue_index = bitmap_first(runq->bitmap, NRQS);
1387 queue_index >= 0;
1388 queue_index = bitmap_next(runq->bitmap, queue_index)) {
1389
1390 sched_entry_t entry;
1391
1392 qe_foreach_element(entry, &runq->queues[queue_index], entry_links) {
1393 assert(count > 0);
1394
1395 sched_group_t group = group_for_entry(entry);
1396 if (group->runq.count > 0) {
1397 if (runq_scan(&group->runq, scan_context))
1398 return (TRUE);
1399 }
1400 count--;
1401 }
1402 }
1403
1404 return (FALSE);
1405 }
1406
1407 static void
1408 sched_multiq_thread_update_scan(sched_update_scan_context_t scan_context)
1409 {
1410 boolean_t restart_needed = FALSE;
1411 processor_t processor = processor_list;
1412 processor_set_t pset;
1413 thread_t thread;
1414 spl_t s;
1415
1416 /*
1417 * We update the threads associated with each processor (bound and idle threads)
1418 * and then update the threads in each pset runqueue.
1419 */
1420
1421 do {
1422 do {
1423 pset = processor->processor_set;
1424
1425 s = splsched();
1426 pset_lock(pset);
1427
1428 restart_needed = runq_scan(multiq_bound_runq(processor), scan_context);
1429
1430 pset_unlock(pset);
1431 splx(s);
1432
1433 if (restart_needed)
1434 break;
1435
1436 thread = processor->idle_thread;
1437 if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) {
1438 if (thread_update_add_thread(thread) == FALSE) {
1439 restart_needed = TRUE;
1440 break;
1441 }
1442 }
1443 } while ((processor = processor->processor_list) != NULL);
1444
1445 /* Ok, we now have a collection of candidates -- fix them. */
1446 thread_update_process_threads();
1447
1448 } while (restart_needed);
1449
1450 pset = &pset0;
1451
1452 do {
1453 do {
1454 s = splsched();
1455 pset_lock(pset);
1456
1457 restart_needed = group_scan(&pset->pset_runq, scan_context);
1458
1459 pset_unlock(pset);
1460 splx(s);
1461
1462 if (restart_needed)
1463 break;
1464 } while ((pset = pset->pset_list) != NULL);
1465
1466 /* Ok, we now have a collection of candidates -- fix them. */
1467 thread_update_process_threads();
1468
1469 } while (restart_needed);
1470 }