osfmk/kern/sched_multiq.c

   1 /*
   2  * Copyright (c) 2013 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <mach/mach_types.h>
  30 #include <mach/machine.h>
  31
  32 #include <machine/machine_routines.h>
  33 #include <machine/sched_param.h>
  34 #include <machine/machine_cpu.h>
  35
  36 #include <kern/kern_types.h>
  37 #include <kern/debug.h>
  38 #include <kern/mach_param.h>
  39 #include <kern/machine.h>
  40 #include <kern/misc_protos.h>
  41 #include <kern/processor.h>
  42 #include <kern/queue.h>
  43 #include <kern/sched.h>
  44 #include <kern/sched_prim.h>
  45 #include <kern/task.h>
  46 #include <kern/thread.h>
  47
  48 #include <sys/kdebug.h>
  49
  50 /*
  51  * Theory Statement
  52  *
  53  * How does the task scheduler work?
  54  *
  55  * It schedules threads across a few levels.
  56  *
  57  * RT threads are dealt with above us
  58  * Bound threads go into the per-processor runq
  59  * Non-bound threads are linked on their task's sched_group's runq
  60  * sched_groups' sched_entries are linked on the pset's runq
  61  *
  62  * TODO: make this explicit - bound threads should have a different enqueue fxn
  63  *
  64  * When we choose a new thread, we will decide whether to look at the bound runqueue, the global runqueue
  65  * or the current group's runqueue, then dequeue the next thread in that runqueue.
  66  *
  67  * We then manipulate the sched_entries to reflect the invariant that:
  68  * Each non-empty priority level in a group's runq is represented by one sched_entry enqueued in the global
  69  * runqueue.
  70  *
  71  * A sched_entry represents a chance at running - for each priority in each task, there is one chance of getting
  72  * to run.  This reduces the excess contention bonus given to processes which have work spread among many threads
  73  * as compared to processes which do the same amount of work under fewer threads.
  74  *
  75  * NOTE: Currently, the multiq scheduler only supports one pset.
  76  *
  77  * NOTE ABOUT thread->sched_pri:
  78  *
  79  * It can change after enqueue - it's changed without pset lock but with thread lock if thread->runq is 0.
  80  * Therefore we can only depend on it not changing during the enqueue and remove path, not the dequeue.
  81  *
  82  * TODO: Future features:
  83  *
  84  * Decouple the task priority from the sched_entry priority, allowing for:
  85  *      fast task priority change without having to iterate and re-dispatch all threads in the task.
  86  *              i.e. task-wide priority, task-wide boosting
  87  *      fancier group decay features
  88  *
  89  * Group (or task) decay:
  90  *      Decay is used for a few different things:
  91  *              Prioritizing latency-needing threads over throughput-needing threads for time-to-running
  92  *              Balancing work between threads in a process
  93  *              Balancing work done at the same priority between different processes
  94  *              Recovering from priority inversions between two threads in the same process
  95  *              Recovering from priority inversions between two threads in different processes
  96  *              Simulating a proportional share scheduler by allowing lower priority threads
  97  *                to run for a certain percentage of the time
  98  *
  99  *      Task decay lets us separately address the 'same process' and 'different process' needs,
 100  *      which will allow us to make smarter tradeoffs in different cases.
 101  *      For example, we could resolve priority inversion in the same process by reordering threads without dropping the
 102  *      process below low priority threads in other processes.
 103  *
 104  * One lock to rule them all (or at least all the runqueues) instead of the pset locks
 105  *
 106  * Shrink sched_entry size to the size of a queue_chain_t by inferring priority, group, and perhaps runq field.
 107  * The entries array is 5K currently so it'd be really great to reduce.
 108  * One way to get sched_group below 4K without a new runq structure would be to remove the extra queues above realtime.
 109  *
 110  * When preempting a processor, store a flag saying if the preemption
 111  * was from a thread in the same group or different group,
 112  * and tell choose_thread about it.
 113  *
 114  * When choosing a processor, bias towards those running in the same
 115  * group as I am running (at the same priority, or within a certain band?).
 116  *
 117  * Decide if we need to support psets.
 118  * Decide how to support psets - do we need duplicate entries for each pset,
 119  * or can we get away with putting the entry in either one or the other pset?
 120  *
 121  * Consider the right way to handle runq count - I don't want to iterate groups.
 122  * Perhaps keep a global counter.  sched_run_count will not work.
 123  * Alternate option - remove it from choose_processor. It doesn't add much value
 124  * now that we have global runq.
 125  *
 126  * Need a better way of finding group to target instead of looking at current_task.
 127  * Perhaps choose_thread could pass in the current thread?
 128  *
 129  * Consider unifying runq copy-pastes.
 130  *
 131  * Thoughts on having a group central quantum bucket:
 132  *
 133  * I see two algorithms to decide quanta:
 134  * A) Hand off only when switching thread to thread in the same group
 135  * B) Allocate and return quanta to the group's pool
 136  *
 137  * Issues:
 138  * If a task blocks completely, should it come back with the leftover quanta
 139  * or brand new quanta?
 140  *
 141  * Should I put a flag saying zero out a quanta you grab when youre dispatched'?
 142  *
 143  * Resolution:
 144  * Handing off quanta between threads will help with jumping around in the current task
 145  * but will not help when a thread from a different task is involved.
 146  * Need an algorithm that works with round robin-ing between threads in different tasks
 147  *
 148  * But wait - round robining can only be triggered by quantum expire or blocking.
 149  * We need something that works with preemption or yielding - that's the more interesting idea.
 150  *
 151  * Existing algorithm - preemption doesn't re-set quantum, puts thread on head of runq.
 152  * Blocking or quantum expiration does re-set quantum, puts thread on tail of runq.
 153  *
 154  * New algorithm -
 155  * Hand off quanta when hopping between threads with same sched_group
 156  * Even if thread was blocked it uses last thread remaining quanta when it starts.
 157  *
 158  * If we use the only cycle entry at quantum algorithm, then the quantum pool starts getting
 159  * interesting.
 160  *
 161  * A thought - perhaps the handoff approach doesn't work so well in the presence of
 162  * non-handoff wakeups i.e. wake other thread then wait then block - doesn't mean that
 163  * woken thread will be what I switch to - other processor may have stolen it.
 164  * What do we do there?
 165  *
 166  * Conclusions:
 167  * We currently don't know of a scenario where quantum buckets on the task is beneficial.
 168  * We will instead handoff quantum between threads in the task, and keep quantum
 169  * on the preempted thread if it's preempted by something outside the task.
 170  *
 171  */
 172
 173 #if DEBUG || DEVELOPMENT
 174 #define MULTIQ_SANITY_CHECK
 175 #endif
 176
 177 typedef struct sched_entry {
 178         queue_chain_t           links;
 179         int16_t                 sched_pri;      /* scheduled (current) priority */
 180         int16_t                 runq;
 181         int32_t                 pad;
 182 } *sched_entry_t;
 183
 184 typedef run_queue_t entry_queue_t;                      /* A run queue that holds sched_entries instead of threads */
 185 typedef run_queue_t group_runq_t;                       /* A run queue that is part of a sched_group */
 186
 187 #define SCHED_ENTRY_NULL        ((sched_entry_t) 0)
 188 #define MULTIQ_ERUNQ            (-4)                    /* Indicates entry is on the main runq */
 189
 190 /* Each level in the run queue corresponds to one entry in the entries array */
 191 struct sched_group {
 192         struct sched_entry      entries[NRQS];
 193         struct run_queue        runq;
 194         queue_chain_t           sched_groups;
 195 };
 196
 197 /* TODO: Turn this into an attribute in the sched dispatch struct */
 198 boolean_t               sched_groups_enabled = FALSE;
 199
 200 /*
 201  * Keep entry on the head of the runqueue while dequeueing threads.
 202  * Only cycle it to the end of the runqueue when a thread in the task
 203  * hits its quantum.
 204  */
 205 static boolean_t        deep_drain = FALSE;
 206
 207 /*
 208  * Don't favor the task when an urgent thread is present.
 209  */
 210 static boolean_t        drain_urgent_first = TRUE;
 211
 212 /* Verify the consistency of the runq before touching it */
 213 static boolean_t        multiq_sanity_check = FALSE;
 214
 215 /*
 216  * Draining threads from the current task is preferred
 217  * when they're less than X steps below the current
 218  * global highest priority
 219  */
 220 #define DEFAULT_DRAIN_BAND_LIMIT MAXPRI
 221 static integer_t        drain_band_limit;
 222
 223 /*
 224  * Don't go below this priority level if there is something above it in another task
 225  */
 226 #define DEFAULT_DRAIN_DEPTH_LIMIT MAXPRI_THROTTLE
 227 static integer_t        drain_depth_limit;
 228
 229
 230 static struct zone      *sched_group_zone;
 231
 232 static uint64_t         num_sched_groups = 0;
 233 static queue_head_t     sched_groups;
 234
 235 static lck_attr_t       sched_groups_lock_attr;
 236 static lck_grp_t        sched_groups_lock_grp;
 237 static lck_grp_attr_t   sched_groups_lock_grp_attr;
 238
 239 static lck_mtx_t        sched_groups_lock;
 240
 241
 242 static void
 243 sched_multiq_init(void);
 244
 245 static thread_t
 246 sched_multiq_steal_thread(processor_set_t pset);
 247
 248 static void
 249 sched_multiq_thread_update_scan(void);
 250
 251 static boolean_t
 252 sched_multiq_processor_enqueue(processor_t processor, thread_t thread, integer_t options);
 253
 254 static boolean_t
 255 sched_multiq_processor_queue_remove(processor_t processor, thread_t thread);
 256
 257 void
 258 sched_multiq_quantum_expire(thread_t thread);
 259
 260 static ast_t
 261 sched_multiq_processor_csw_check(processor_t processor);
 262
 263 static boolean_t
 264 sched_multiq_processor_queue_has_priority(processor_t processor, int priority, boolean_t gte);
 265
 266 static int
 267 sched_multiq_runq_count(processor_t processor);
 268
 269 static boolean_t
 270 sched_multiq_processor_queue_empty(processor_t processor);
 271
 272 static uint64_t
 273 sched_multiq_runq_stats_count_sum(processor_t processor);
 274
 275 static int
 276 sched_multiq_processor_bound_count(processor_t processor);
 277
 278 static void
 279 sched_multiq_pset_init(processor_set_t pset);
 280
 281 static void
 282 sched_multiq_processor_init(processor_t processor);
 283
 284 static thread_t
 285 sched_multiq_choose_thread(processor_t processor, int priority, ast_t reason);
 286
 287 static void
 288 sched_multiq_processor_queue_shutdown(processor_t processor);
 289
 290 static sched_mode_t
 291 sched_multiq_initial_thread_sched_mode(task_t parent_task);
 292
 293 static boolean_t
 294 sched_multiq_should_current_thread_rechoose_processor(processor_t processor);
 295
 296 const struct sched_dispatch_table sched_multiq_dispatch = {
 297         .init                                           = sched_multiq_init,
 298         .timebase_init                                  = sched_traditional_timebase_init,
 299         .processor_init                                 = sched_multiq_processor_init,
 300         .pset_init                                      = sched_multiq_pset_init,
 301         .maintenance_continuation                       = sched_traditional_maintenance_continue,
 302         .choose_thread                                  = sched_multiq_choose_thread,
 303         .steal_thread                                   = sched_multiq_steal_thread,
 304         .compute_priority                               = compute_priority,
 305         .choose_processor                               = choose_processor,
 306         .processor_enqueue                              = sched_multiq_processor_enqueue,
 307         .processor_queue_shutdown                       = sched_multiq_processor_queue_shutdown,
 308         .processor_queue_remove                         = sched_multiq_processor_queue_remove,
 309         .processor_queue_empty                          = sched_multiq_processor_queue_empty,
 310         .priority_is_urgent                             = priority_is_urgent,
 311         .processor_csw_check                            = sched_multiq_processor_csw_check,
 312         .processor_queue_has_priority                   = sched_multiq_processor_queue_has_priority,
 313         .initial_quantum_size                           = sched_traditional_initial_quantum_size,
 314         .initial_thread_sched_mode                      = sched_multiq_initial_thread_sched_mode,
 315         .can_update_priority                            = can_update_priority,
 316         .update_priority                                = update_priority,
 317         .lightweight_update_priority                    = lightweight_update_priority,
 318         .quantum_expire                                 = sched_multiq_quantum_expire,
 319         .should_current_thread_rechoose_processor       = sched_multiq_should_current_thread_rechoose_processor,
 320         .processor_runq_count                           = sched_multiq_runq_count,
 321         .processor_runq_stats_count_sum                 = sched_multiq_runq_stats_count_sum,
 322         .fairshare_init                                 = sched_traditional_fairshare_init,
 323         .fairshare_runq_count                           = sched_traditional_fairshare_runq_count,
 324         .fairshare_runq_stats_count_sum                 = sched_traditional_fairshare_runq_stats_count_sum,
 325         .fairshare_enqueue                              = sched_traditional_fairshare_enqueue,
 326         .fairshare_dequeue                              = sched_traditional_fairshare_dequeue,
 327         .fairshare_queue_remove                         = sched_traditional_fairshare_queue_remove,
 328         .processor_bound_count                          = sched_multiq_processor_bound_count,
 329         .thread_update_scan                             = sched_multiq_thread_update_scan,
 330         .direct_dispatch_to_idle_processors             = FALSE,
 331 };
 332
 333
 334 static void
 335 sched_multiq_init(void)
 336 {
 337         sched_groups_enabled = TRUE;
 338
 339 #if defined(MULTIQ_SANITY_CHECK)
 340         PE_parse_boot_argn("-multiq-sanity-check", &multiq_sanity_check, sizeof(multiq_sanity_check));
 341 #endif
 342
 343         PE_parse_boot_argn("-multiq-deep-drain", &deep_drain, sizeof(deep_drain));
 344
 345         PE_parse_boot_argn("multiq_drain_urgent_first", &drain_urgent_first, sizeof(drain_urgent_first));
 346
 347         if (!PE_parse_boot_argn("multiq_drain_depth_limit", &drain_depth_limit, sizeof(drain_depth_limit))) {
 348                 drain_depth_limit = DEFAULT_DRAIN_DEPTH_LIMIT;
 349         }
 350
 351         if (!PE_parse_boot_argn("multiq_drain_band_limit", &drain_band_limit, sizeof(drain_band_limit))) {
 352                 drain_band_limit = DEFAULT_DRAIN_BAND_LIMIT;
 353         }
 354
 355         printf("multiq scheduler config: deep-drain %d, urgent first %d, depth limit %d, band limit %d, sanity check %d\n",
 356                deep_drain, drain_urgent_first, drain_depth_limit, drain_band_limit, multiq_sanity_check);
 357
 358         sched_group_zone = zinit(
 359                                  sizeof(struct sched_group),
 360                                  task_max * sizeof(struct sched_group),
 361                                  PAGE_SIZE,
 362                                  "sched groups");
 363
 364         zone_change(sched_group_zone, Z_NOENCRYPT, TRUE);
 365         zone_change(sched_group_zone, Z_NOCALLOUT, TRUE);
 366
 367         queue_init(&sched_groups);
 368
 369         lck_grp_attr_setdefault(&sched_groups_lock_grp_attr);
 370         lck_grp_init(&sched_groups_lock_grp, "sched_groups", &sched_groups_lock_grp_attr);
 371         lck_attr_setdefault(&sched_groups_lock_attr);
 372         lck_mtx_init(&sched_groups_lock, &sched_groups_lock_grp, &sched_groups_lock_attr);
 373
 374         sched_traditional_init();
 375 }
 376
 377 static void
 378 sched_multiq_processor_init(processor_t processor)
 379 {
 380         run_queue_init(&processor->runq);
 381 }
 382
 383 static void
 384 sched_multiq_pset_init(processor_set_t pset)
 385 {
 386         run_queue_init(&pset->pset_runq);
 387 }
 388
 389 static sched_mode_t
 390 sched_multiq_initial_thread_sched_mode(task_t parent_task)
 391 {
 392         if (parent_task == kernel_task)
 393                 return TH_MODE_FIXED;
 394         else
 395                 return TH_MODE_TIMESHARE;
 396 }
 397
 398 sched_group_t
 399 sched_group_create(void)
 400 {
 401         sched_group_t       sched_group;
 402
 403         if (!sched_groups_enabled)
 404                 return SCHED_GROUP_NULL;
 405
 406         sched_group = (sched_group_t)zalloc(sched_group_zone);
 407
 408         bzero(sched_group, sizeof(struct sched_group));
 409
 410         run_queue_init(&sched_group->runq);
 411
 412         for (int i = 0; i < NRQS; i++) {
 413                 sched_group->entries[i].runq = 0;
 414                 sched_group->entries[i].sched_pri = i;
 415         }
 416
 417         lck_mtx_lock(&sched_groups_lock);
 418         queue_enter(&sched_groups, sched_group, sched_group_t, sched_groups);
 419         num_sched_groups++;
 420         lck_mtx_unlock(&sched_groups_lock);
 421
 422         return (sched_group);
 423 }
 424
 425 void
 426 sched_group_destroy(sched_group_t sched_group)
 427 {
 428         if (!sched_groups_enabled) {
 429                 assert(sched_group == SCHED_GROUP_NULL);
 430                 return;
 431         }
 432
 433         assert(sched_group != SCHED_GROUP_NULL);
 434         assert(sched_group->runq.count == 0);
 435
 436         for (int i = 0; i < NRQS; i++) {
 437                 assert(sched_group->entries[i].runq == 0);
 438                 assert(sched_group->entries[i].sched_pri == i);
 439         }
 440
 441         lck_mtx_lock(&sched_groups_lock);
 442         queue_remove(&sched_groups, sched_group, sched_group_t, sched_groups);
 443         num_sched_groups--;
 444         lck_mtx_unlock(&sched_groups_lock);
 445
 446         zfree(sched_group_zone, sched_group);
 447 }
 448
 449 __attribute__((always_inline))
 450 static inline entry_queue_t
 451 multiq_main_entryq(processor_t processor)
 452 {
 453         return (entry_queue_t)&processor->processor_set->pset_runq;
 454 }
 455
 456 __attribute__((always_inline))
 457 static inline run_queue_t
 458 multiq_bound_runq(processor_t processor)
 459 {
 460         return &processor->runq;
 461 }
 462
 463 __attribute__((always_inline))
 464 static inline sched_entry_t
 465 group_entry_for_pri(sched_group_t group, integer_t pri)
 466 {
 467         return &group->entries[pri];
 468 }
 469
 470 __attribute__((always_inline))
 471 static inline sched_group_t
 472 group_for_entry(sched_entry_t entry)
 473 {
 474         sched_group_t group = (sched_group_t)(entry - entry->sched_pri);
 475         return group;
 476 }
 477
 478 /* Peek at the head of the runqueue */
 479 static sched_entry_t
 480 entry_queue_first_entry(entry_queue_t rq)
 481 {
 482         assert(rq->count != 0);
 483
 484         queue_t queue = rq->queues + rq->highq;
 485
 486         sched_entry_t entry = (sched_entry_t)queue_first(queue);
 487
 488         assert(entry->sched_pri == rq->highq);
 489
 490         return entry;
 491 }
 492
 493 #if defined(MULTIQ_SANITY_CHECK)
 494
 495 __attribute__((always_inline))
 496 static inline boolean_t
 497 queue_chain_linked(queue_chain_t* chain)
 498 {
 499         if (chain->next != NULL) {
 500                 assert(chain->prev != NULL);
 501                 return TRUE;
 502         } else {
 503                 assert(chain->prev == NULL);
 504                 return FALSE;
 505         }
 506 }
 507
 508 static thread_t
 509 group_first_thread(sched_group_t group)
 510 {
 511         group_runq_t rq = &group->runq;
 512
 513         assert(rq->count != 0);
 514
 515         queue_t queue = rq->queues + rq->highq;
 516
 517         thread_t thread = (thread_t)(void*)queue_first(queue);
 518
 519         assert(thread != THREAD_NULL);
 520
 521         assert(thread->sched_group == group);
 522
 523         /* TODO: May not be safe */
 524         assert(thread->sched_pri == rq->highq);
 525
 526         return thread;
 527 }
 528
 529 /* Asserts if entry is not in entry runq at pri */
 530 static void
 531 entry_queue_check_entry(entry_queue_t runq, sched_entry_t entry, int expected_pri)
 532 {
 533         queue_t q;
 534         sched_entry_t elem;
 535
 536         assert(queue_chain_linked(&entry->links));
 537         assert(entry->runq == MULTIQ_ERUNQ);
 538
 539         q = &runq->queues[expected_pri];
 540
 541         queue_iterate(q, elem, sched_entry_t, links) {
 542                 if (elem == entry)
 543                         return;
 544         }
 545
 546         panic("runq %p doesn't contain entry %p at pri %d", runq, entry, expected_pri);
 547 }
 548
 549 /* Asserts if thread is not in group at its priority */
 550 static void
 551 sched_group_check_thread(sched_group_t group, thread_t thread)
 552 {
 553         queue_t q;
 554         thread_t elem;
 555         int pri = thread->sched_pri;
 556
 557         assert(thread->runq != PROCESSOR_NULL);
 558
 559         q = &group->runq.queues[pri];
 560
 561         queue_iterate(q, elem, thread_t, links) {
 562                 if (elem == thread)
 563                         return;
 564         }
 565
 566         panic("group %p doesn't contain thread %p at pri %d", group, thread, pri);
 567 }
 568
 569 static void
 570 global_check_entry_queue(entry_queue_t main_entryq)
 571 {
 572         if (main_entryq->count == 0)
 573                 return;
 574
 575         sched_entry_t entry = entry_queue_first_entry(main_entryq);
 576
 577         assert(entry->runq == MULTIQ_ERUNQ);
 578
 579         sched_group_t group = group_for_entry(entry);
 580
 581         thread_t thread = group_first_thread(group);
 582
 583         __assert_only sched_entry_t thread_entry = group_entry_for_pri(thread->sched_group, thread->sched_pri);
 584
 585         assert(entry->sched_pri == group->runq.highq);
 586
 587         assert(entry == thread_entry);
 588         assert(thread->runq != PROCESSOR_NULL);
 589 }
 590
 591 static void
 592 group_check_run_queue(entry_queue_t main_entryq, sched_group_t group)
 593 {
 594         if (group->runq.count == 0)
 595                 return;
 596
 597         thread_t thread = group_first_thread(group);
 598
 599         assert(thread->runq != PROCESSOR_NULL);
 600
 601         sched_entry_t sched_entry = group_entry_for_pri(thread->sched_group, thread->sched_pri);
 602
 603         entry_queue_check_entry(main_entryq, sched_entry, thread->sched_pri);
 604
 605         assert(sched_entry->sched_pri == thread->sched_pri);
 606         assert(sched_entry->runq == MULTIQ_ERUNQ);
 607 }
 608
 609 #endif /* defined(MULTIQ_SANITY_CHECK) */
 610
 611 /*
 612  * The run queue must not be empty.
 613  */
 614 static sched_entry_t
 615 entry_queue_dequeue_entry(entry_queue_t rq)
 616 {
 617         sched_entry_t   sched_entry;
 618         queue_t         queue = rq->queues + rq->highq;
 619
 620         assert(rq->count > 0);
 621         assert(!queue_empty(queue));
 622
 623         sched_entry = (sched_entry_t)dequeue_head(queue);
 624
 625         SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
 626         rq->count--;
 627         if (SCHED(priority_is_urgent)(rq->highq)) {
 628                 rq->urgency--; assert(rq->urgency >= 0);
 629         }
 630         if (queue_empty(queue)) {
 631                 if (rq->highq != IDLEPRI)
 632                         clrbit(MAXPRI - rq->highq, rq->bitmap);
 633                 rq->highq = MAXPRI - ffsbit(rq->bitmap);
 634         }
 635
 636         sched_entry->runq = 0;
 637
 638         return (sched_entry);
 639 }
 640
 641 /*
 642  * The run queue must not be empty.
 643  */
 644 static boolean_t
 645 entry_queue_enqueue_entry(
 646                           entry_queue_t rq,
 647                           sched_entry_t entry,
 648                           integer_t     options)
 649 {
 650         int             sched_pri = entry->sched_pri;
 651         queue_t         queue = rq->queues + sched_pri;
 652         boolean_t       result = FALSE;
 653
 654         assert(entry->runq == 0);
 655
 656         if (queue_empty(queue)) {
 657                 enqueue_tail(queue, (queue_entry_t)entry);
 658
 659                 setbit(MAXPRI - sched_pri, rq->bitmap);
 660                 if (sched_pri > rq->highq) {
 661                         rq->highq = sched_pri;
 662                         result = TRUE;
 663                 }
 664         } else {
 665                 if (options & SCHED_TAILQ)
 666                         enqueue_tail(queue, (queue_entry_t)entry);
 667                 else
 668                         enqueue_head(queue, (queue_entry_t)entry);
 669         }
 670         if (SCHED(priority_is_urgent)(sched_pri))
 671                 rq->urgency++;
 672         SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
 673         rq->count++;
 674
 675         entry->runq = MULTIQ_ERUNQ;
 676
 677         return (result);
 678 }
 679
 680 /*
 681  * The entry must be in this runqueue.
 682  */
 683 static void
 684 entry_queue_remove_entry(
 685                          entry_queue_t  rq,
 686                          sched_entry_t  entry)
 687 {
 688         int sched_pri = entry->sched_pri;
 689
 690 #if defined(MULTIQ_SANITY_CHECK)
 691         if (multiq_sanity_check) {
 692                 entry_queue_check_entry(rq, entry, sched_pri);
 693         }
 694 #endif
 695
 696         remqueue((queue_entry_t)entry);
 697
 698         SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
 699         rq->count--;
 700         if (SCHED(priority_is_urgent)(sched_pri)) {
 701                 rq->urgency--; assert(rq->urgency >= 0);
 702         }
 703
 704         if (queue_empty(rq->queues + sched_pri)) {
 705                 /* update run queue status */
 706                 if (sched_pri != IDLEPRI)
 707                         clrbit(MAXPRI - sched_pri, rq->bitmap);
 708                 rq->highq = MAXPRI - ffsbit(rq->bitmap);
 709         }
 710
 711         entry->runq = 0;
 712 }
 713
 714 /*
 715  * The run queue must not be empty.
 716  *
 717  * sets queue_empty to TRUE if queue is now empty at thread_pri
 718  */
 719 static thread_t
 720 group_run_queue_dequeue_thread(
 721                          group_runq_t   rq,
 722                          integer_t     *thread_pri,
 723                          boolean_t     *queue_empty)
 724 {
 725         thread_t        thread;
 726         queue_t         queue = rq->queues + rq->highq;
 727
 728         assert(rq->count > 0);
 729         assert(!queue_empty(queue));
 730
 731         *thread_pri = rq->highq;
 732
 733         thread = (thread_t)(void*)dequeue_head(queue);
 734
 735         SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
 736         rq->count--;
 737         if (SCHED(priority_is_urgent)(rq->highq)) {
 738                 rq->urgency--; assert(rq->urgency >= 0);
 739         }
 740         if (queue_empty(queue)) {
 741                 if (rq->highq != IDLEPRI)
 742                         clrbit(MAXPRI - rq->highq, rq->bitmap);
 743                 rq->highq = MAXPRI - ffsbit(rq->bitmap);
 744                 *queue_empty = TRUE;
 745         } else {
 746                 *queue_empty = FALSE;
 747         }
 748
 749         return (thread);
 750 }
 751
 752 /*
 753  * The run queue must not be empty.
 754  * returns TRUE if queue was empty at thread_pri
 755  */
 756 static boolean_t
 757 group_run_queue_enqueue_thread(
 758                          group_runq_t   rq,
 759                          thread_t       thread,
 760                          integer_t      thread_pri,
 761                          integer_t      options)
 762 {
 763         queue_t         queue = rq->queues + thread_pri;
 764         boolean_t       result = FALSE;
 765
 766         assert(thread->runq == PROCESSOR_NULL);
 767
 768         if (queue_empty(queue)) {
 769                 enqueue_tail(queue, (queue_entry_t)thread);
 770
 771                 setbit(MAXPRI - thread_pri, rq->bitmap);
 772                 if (thread_pri > rq->highq) {
 773                         rq->highq = thread_pri;
 774                 }
 775                 result = TRUE;
 776         } else {
 777                 if (options & SCHED_TAILQ)
 778                         enqueue_tail(queue, (queue_entry_t)thread);
 779                 else
 780                         enqueue_head(queue, (queue_entry_t)thread);
 781         }
 782         if (SCHED(priority_is_urgent)(thread_pri))
 783                 rq->urgency++;
 784         SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
 785         rq->count++;
 786
 787         return (result);
 788 }
 789
 790 /*
 791  * The thread must be in this runqueue.
 792  * returns TRUE if queue is now empty at thread_pri
 793  */
 794 static boolean_t
 795 group_run_queue_remove_thread(
 796                         group_runq_t    rq,
 797                         thread_t        thread,
 798                         integer_t       thread_pri)
 799 {
 800         boolean_t       result = FALSE;
 801
 802         assert(thread->runq != PROCESSOR_NULL);
 803
 804         remqueue((queue_entry_t)thread);
 805
 806         SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
 807         rq->count--;
 808         if (SCHED(priority_is_urgent)(thread_pri)) {
 809                 rq->urgency--; assert(rq->urgency >= 0);
 810         }
 811
 812         if (queue_empty(rq->queues + thread_pri)) {
 813                 /* update run queue status */
 814                 if (thread_pri != IDLEPRI)
 815                         clrbit(MAXPRI - thread_pri, rq->bitmap);
 816                 rq->highq = MAXPRI - ffsbit(rq->bitmap);
 817                 result = TRUE;
 818         }
 819
 820         thread->runq = PROCESSOR_NULL;
 821
 822         return result;
 823 }
 824
 825 /*
 826  * A thread's sched pri may change out from under us because
 827  * we're clearing thread->runq here without the thread locked.
 828  * Do not rely on it to be the same as when we enqueued.
 829  */
 830 static thread_t
 831 sched_global_dequeue_thread(entry_queue_t main_entryq)
 832 {
 833         boolean_t pri_level_empty = FALSE;
 834         sched_entry_t entry;
 835         group_runq_t group_runq;
 836         thread_t thread;
 837         integer_t thread_pri;
 838         sched_group_t group;
 839
 840         assert(main_entryq->count > 0);
 841
 842         entry = entry_queue_dequeue_entry(main_entryq);
 843
 844         group = group_for_entry(entry);
 845         group_runq = &group->runq;
 846
 847         thread = group_run_queue_dequeue_thread(group_runq, &thread_pri, &pri_level_empty);
 848
 849         thread->runq = PROCESSOR_NULL;
 850
 851         if (!pri_level_empty) {
 852                 entry_queue_enqueue_entry(main_entryq, entry, SCHED_TAILQ);
 853         }
 854
 855         return thread;
 856 }
 857
 858 /* Dequeue a thread from the global runq without moving the entry */
 859 static thread_t
 860 sched_global_deep_drain_dequeue_thread(entry_queue_t main_entryq)
 861 {
 862         boolean_t pri_level_empty = FALSE;
 863         sched_entry_t entry;
 864         group_runq_t group_runq;
 865         thread_t thread;
 866         integer_t thread_pri;
 867         sched_group_t group;
 868
 869         assert(main_entryq->count > 0);
 870
 871         entry = entry_queue_first_entry(main_entryq);
 872
 873         group = group_for_entry(entry);
 874         group_runq = &group->runq;
 875
 876         thread = group_run_queue_dequeue_thread(group_runq, &thread_pri, &pri_level_empty);
 877
 878         thread->runq = PROCESSOR_NULL;
 879
 880         if (pri_level_empty) {
 881                 entry_queue_remove_entry(main_entryq, entry);
 882         }
 883
 884         return thread;
 885 }
 886
 887
 888 static thread_t
 889 sched_group_dequeue_thread(
 890                            entry_queue_t main_entryq,
 891                            sched_group_t group)
 892 {
 893         group_runq_t group_runq = &group->runq;
 894         boolean_t pri_level_empty = FALSE;
 895         thread_t thread;
 896         integer_t thread_pri;
 897
 898         thread = group_run_queue_dequeue_thread(group_runq, &thread_pri, &pri_level_empty);
 899
 900         thread->runq = PROCESSOR_NULL;
 901
 902         if (pri_level_empty) {
 903                 entry_queue_remove_entry(main_entryq, group_entry_for_pri(group, thread_pri));
 904         }
 905
 906         return thread;
 907 }
 908
 909 static void
 910 sched_group_remove_thread(
 911                           entry_queue_t main_entryq,
 912                           sched_group_t group,
 913                           thread_t thread)
 914 {
 915         integer_t thread_pri = thread->sched_pri;
 916         sched_entry_t sched_entry = group_entry_for_pri(group, thread_pri);
 917
 918 #if defined(MULTIQ_SANITY_CHECK)
 919         if (multiq_sanity_check) {
 920                 global_check_entry_queue(main_entryq);
 921                 group_check_run_queue(main_entryq, group);
 922
 923                 sched_group_check_thread(group, thread);
 924                 entry_queue_check_entry(main_entryq, sched_entry, thread_pri);
 925         }
 926 #endif
 927
 928         boolean_t pri_level_empty = group_run_queue_remove_thread(&group->runq, thread, thread_pri);
 929
 930         if (pri_level_empty) {
 931                 entry_queue_remove_entry(main_entryq, sched_entry);
 932         }
 933
 934 #if defined(MULTIQ_SANITY_CHECK)
 935         if (multiq_sanity_check) {
 936                 global_check_entry_queue(main_entryq);
 937                 group_check_run_queue(main_entryq, group);
 938         }
 939 #endif
 940 }
 941
 942 static void
 943 sched_group_enqueue_thread(
 944                            entry_queue_t        main_entryq,
 945                            sched_group_t        group,
 946                            thread_t             thread,
 947                            integer_t            options)
 948 {
 949 #if defined(MULTIQ_SANITY_CHECK)
 950         if (multiq_sanity_check) {
 951                 global_check_entry_queue(main_entryq);
 952                 group_check_run_queue(main_entryq, group);
 953         }
 954 #endif
 955
 956         int sched_pri = thread->sched_pri;
 957
 958         boolean_t pri_level_was_empty = group_run_queue_enqueue_thread(&group->runq, thread, sched_pri, options);
 959
 960         if (pri_level_was_empty) {
 961                 /*
 962                  * TODO: Need to figure out if passing options here is a good idea or not
 963                  * What effects would it have?
 964                  */
 965                 entry_queue_enqueue_entry(main_entryq, &group->entries[sched_pri], options);
 966         }
 967 }
 968
 969 /*
 970  *  Locate a thread to execute from the run queue and return it.
 971  *  Only choose a thread with greater or equal priority.
 972  *
 973  *  pset is locked, thread is not locked.
 974  *
 975  *  Returns THREAD_NULL if it cannot find a valid thread.
 976  *
 977  *  Note: we cannot rely on the value of thread->sched_pri in this path because
 978  *  we don't have the thread locked.
 979  *
 980  *  TODO: Remove tracepoints
 981  */
 982 static thread_t
 983 sched_multiq_choose_thread(
 984                            processor_t      processor,
 985                            int              priority,
 986                            ast_t            reason)
 987 {
 988         entry_queue_t   main_entryq = multiq_main_entryq(processor);
 989         run_queue_t     bound_runq  = multiq_bound_runq(processor);
 990
 991         boolean_t choose_bound_runq = FALSE;
 992
 993         if (bound_runq->highq  < priority &&
 994             main_entryq->highq < priority)
 995                 return THREAD_NULL;
 996
 997         if (bound_runq->count && main_entryq->count) {
 998                 if (bound_runq->highq >= main_entryq->highq) {
 999                         choose_bound_runq = TRUE;
1000                 } else {
1001                         /* Use main runq */
1002                 }
1003         } else if (bound_runq->count) {
1004                 choose_bound_runq = TRUE;
1005         } else if (main_entryq->count) {
1006                 /* Use main runq */
1007         } else {
1008                 return (THREAD_NULL);
1009         }
1010
1011         if (choose_bound_runq) {
1012                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1013                     MACHDBG_CODE(DBG_MACH_SCHED, MACH_MULTIQ_DEQUEUE) | DBG_FUNC_NONE,
1014                     MACH_MULTIQ_BOUND, main_entryq->highq, bound_runq->highq, 0, 0);
1015
1016                 return run_queue_dequeue(bound_runq, SCHED_HEADQ);
1017         }
1018
1019         sched_group_t group = current_thread()->sched_group;
1020
1021 #if defined(MULTIQ_SANITY_CHECK)
1022         if (multiq_sanity_check) {
1023                 global_check_entry_queue(main_entryq);
1024                 group_check_run_queue(main_entryq, group);
1025         }
1026 #endif
1027
1028         /*
1029          * Determine if we should look at the group or the global queue
1030          *
1031          * TODO:
1032          * Perhaps pass reason as a 'should look inside' argument to choose_thread
1033          * Should YIELD AST override drain limit?
1034          */
1035         if (group->runq.count != 0 && (reason & AST_PREEMPTION) == 0) {
1036                 boolean_t   drain_limit_hit = FALSE;
1037
1038                 if (main_entryq->highq > group->runq.highq) {
1039                         /*
1040                          * If there's something elsewhere above the depth limit,
1041                          * don't pick a thread below the limit.
1042                          */
1043                         if (main_entryq->highq > drain_depth_limit &&
1044                             group->runq.highq <= drain_depth_limit)
1045                                 drain_limit_hit = TRUE;
1046
1047                         /*
1048                          * Don't go more than X steps below the global highest
1049                          */
1050                         if ((main_entryq->highq - group->runq.highq) >= drain_band_limit)
1051                                 drain_limit_hit = TRUE;
1052
1053                         /* Don't favor the task when an urgent thread is present. */
1054                         if (drain_urgent_first && main_entryq->urgency > 0)
1055                                 drain_limit_hit = TRUE;
1056                 }
1057
1058                 if (!drain_limit_hit) {
1059                         /* Pull from local runq */
1060                         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1061                             MACHDBG_CODE(DBG_MACH_SCHED, MACH_MULTIQ_DEQUEUE) | DBG_FUNC_NONE,
1062                             MACH_MULTIQ_GROUP, main_entryq->highq, group->runq.highq, 0, 0);
1063
1064                         return sched_group_dequeue_thread(main_entryq, group);
1065                 }
1066         }
1067
1068         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1069             MACHDBG_CODE(DBG_MACH_SCHED, MACH_MULTIQ_DEQUEUE) | DBG_FUNC_NONE,
1070             MACH_MULTIQ_GLOBAL, main_entryq->highq, group->runq.highq, 0, 0);
1071
1072         /* Couldn't pull from local runq, pull from global runq instead */
1073         if (deep_drain) {
1074                 return sched_global_deep_drain_dequeue_thread(main_entryq);
1075         } else {
1076                 return sched_global_dequeue_thread(main_entryq);
1077         }
1078 }
1079
1080
1081 /*
1082  * Thread must be locked, and not already be on a run queue.
1083  * pset is locked.
1084  */
1085 static boolean_t
1086 sched_multiq_processor_enqueue(
1087                                processor_t      processor,
1088                                thread_t         thread,
1089                                integer_t        options)
1090 {
1091         boolean_t       result;
1092
1093         assert(processor == thread->chosen_processor);
1094
1095         if (thread->bound_processor != PROCESSOR_NULL) {
1096                 assert(thread->bound_processor == processor);
1097
1098                 result = run_queue_enqueue(multiq_bound_runq(processor), thread, options);
1099                 thread->runq = processor;
1100
1101                 return result;
1102         }
1103
1104         sched_group_enqueue_thread(multiq_main_entryq(processor),
1105                                    thread->sched_group,
1106                                    thread, options);
1107
1108         thread->runq = processor;
1109
1110         return (FALSE);
1111 }
1112
1113 /*
1114  * Called in the context of thread with thread and pset unlocked,
1115  * after updating thread priority but before propagating that priority
1116  * to the processor
1117  */
1118 void
1119 sched_multiq_quantum_expire(thread_t thread)
1120 {
1121         if (deep_drain) {
1122                 /*
1123                  * Move the entry at this priority to the end of the queue,
1124                  * to allow the next task a shot at running.
1125                  */
1126
1127                 processor_t processor = thread->last_processor;
1128                 processor_set_t pset = processor->processor_set;
1129                 entry_queue_t entryq = multiq_main_entryq(processor);
1130
1131                 pset_lock(pset);
1132
1133                 sched_entry_t entry = group_entry_for_pri(thread->sched_group, processor->current_pri);
1134
1135                 if (entry->runq == MULTIQ_ERUNQ) {
1136                         entry_queue_remove_entry(entryq, entry);
1137                         entry_queue_enqueue_entry(entryq, entry, SCHED_TAILQ);
1138                 }
1139
1140                 pset_unlock(pset);
1141         }
1142 }
1143
1144 static boolean_t
1145 sched_multiq_processor_queue_empty(processor_t processor)
1146 {
1147         return multiq_main_entryq(processor)->count == 0 &&
1148                multiq_bound_runq(processor)->count  == 0;
1149 }
1150
1151 static ast_t
1152 sched_multiq_processor_csw_check(processor_t processor)
1153 {
1154         boolean_t       has_higher;
1155         int             pri;
1156
1157         entry_queue_t main_entryq = multiq_main_entryq(processor);
1158         run_queue_t   bound_runq  = multiq_bound_runq(processor);
1159
1160         assert(processor->active_thread != NULL);
1161
1162         pri = MAX(main_entryq->highq, bound_runq->highq);
1163
1164         if (first_timeslice(processor)) {
1165                 has_higher = (pri > processor->current_pri);
1166         } else {
1167                 has_higher = (pri >= processor->current_pri);
1168         }
1169
1170         if (has_higher) {
1171                 if (main_entryq->urgency > 0)
1172                         return (AST_PREEMPT | AST_URGENT);
1173
1174                 if (bound_runq->urgency > 0)
1175                         return (AST_PREEMPT | AST_URGENT);
1176
1177                 if (processor->active_thread && thread_eager_preemption(processor->active_thread))
1178                         return (AST_PREEMPT | AST_URGENT);
1179
1180                 return AST_PREEMPT;
1181         }
1182
1183         return AST_NONE;
1184 }
1185
1186 static boolean_t
1187 sched_multiq_processor_queue_has_priority(
1188                                           processor_t   processor,
1189                                           int           priority,
1190                                           boolean_t     gte)
1191 {
1192         int qpri = MAX(multiq_main_entryq(processor)->highq, multiq_bound_runq(processor)->highq);
1193
1194         if (gte)
1195                 return qpri >= priority;
1196         else
1197                 return qpri > priority;
1198 }
1199
1200 static boolean_t
1201 sched_multiq_should_current_thread_rechoose_processor(processor_t processor)
1202 {
1203         return (processor->current_pri < BASEPRI_RTQUEUES && processor->processor_primary != processor);
1204 }
1205
1206 static int
1207 sched_multiq_runq_count(processor_t processor)
1208 {
1209         /*
1210          *  TODO: Decide whether to keep a count of runnable threads in the pset
1211          *  or just return something less than the true count.
1212          *
1213          *  This needs to be fast, so no iterating the whole runq.
1214          *
1215          *  Another possible decision is to remove this - with global runq
1216          *  it doesn't make much sense.
1217          */
1218         return multiq_main_entryq(processor)->count + multiq_bound_runq(processor)->count;
1219 }
1220
1221 static uint64_t
1222 sched_multiq_runq_stats_count_sum(processor_t processor)
1223 {
1224         /*
1225          * TODO: This one does need to go through all the runqueues, but it's only needed for
1226          * the sched stats tool
1227          */
1228
1229         uint64_t bound_sum = multiq_bound_runq(processor)->runq_stats.count_sum;
1230
1231         if (processor->cpu_id == processor->processor_set->cpu_set_low)
1232                 return bound_sum + multiq_main_entryq(processor)->runq_stats.count_sum;
1233         else
1234                 return bound_sum;
1235 }
1236
1237 static int
1238 sched_multiq_processor_bound_count(processor_t processor)
1239 {
1240         return multiq_bound_runq(processor)->count;
1241 }
1242
1243 static void
1244 sched_multiq_processor_queue_shutdown(processor_t processor)
1245 {
1246         processor_set_t pset = processor->processor_set;
1247         entry_queue_t   main_entryq = multiq_main_entryq(processor);
1248         thread_t        thread;
1249         queue_head_t    tqueue;
1250
1251         /* We only need to migrate threads if this is the last active processor in the pset */
1252         if (pset->online_processor_count > 0) {
1253                 pset_unlock(pset);
1254                 return;
1255         }
1256
1257         queue_init(&tqueue);
1258
1259         /* Note that we do not remove bound threads from the queues here */
1260
1261         while (main_entryq->count > 0) {
1262                 thread = sched_global_dequeue_thread(main_entryq);
1263                 enqueue_tail(&tqueue, (queue_entry_t)thread);
1264         }
1265
1266         pset_unlock(pset);
1267
1268         while ((thread = (thread_t)(void*)dequeue_head(&tqueue)) != THREAD_NULL) {
1269                 thread_lock(thread);
1270
1271                 thread_setrun(thread, SCHED_TAILQ);
1272
1273                 thread_unlock(thread);
1274         }
1275 }
1276
1277 /*
1278  * Thread is locked
1279  *
1280  * This is why we can never read sched_pri unless we have the thread locked.
1281  * Which we do in the enqueue and remove cases, but not the dequeue case.
1282  */
1283 static boolean_t
1284 sched_multiq_processor_queue_remove(
1285                                     processor_t processor,
1286                                     thread_t    thread)
1287 {
1288         boolean_t removed = FALSE;
1289
1290         processor_set_t pset = processor->processor_set;
1291
1292         pset_lock(pset);
1293
1294         if (thread->runq != PROCESSOR_NULL) {
1295                 /*
1296                  * Thread is on a run queue and we have a lock on
1297                  * that run queue.
1298                  */
1299
1300                 assert(thread->runq == processor);
1301
1302                 if (thread->bound_processor != PROCESSOR_NULL) {
1303                         assert(processor == thread->bound_processor);
1304                         run_queue_remove(multiq_bound_runq(processor), thread);
1305                         thread->runq = PROCESSOR_NULL;
1306                 } else {
1307                         sched_group_remove_thread(multiq_main_entryq(processor),
1308                                                   thread->sched_group,
1309                                                   thread);
1310                 }
1311
1312                 removed = TRUE;
1313         }
1314
1315         pset_unlock(pset);
1316
1317         return removed;
1318 }
1319
1320 /* pset is locked, returned unlocked */
1321 static thread_t
1322 sched_multiq_steal_thread(processor_set_t pset)
1323 {
1324         pset_unlock(pset);
1325         return (THREAD_NULL);
1326 }
1327
1328 /*
1329  * Scan the global queue for candidate groups, and scan those groups for
1330  * candidate threads.
1331  *
1332  * Returns TRUE if retry is needed.
1333  */
1334 static boolean_t
1335 group_scan(entry_queue_t runq) {
1336         int             count;
1337         queue_t         q;
1338         sched_group_t   group;
1339         sched_entry_t   entry;
1340
1341         if ((count = runq->count) > 0) {
1342                 q = runq->queues + runq->highq;
1343                 while (count > 0) {
1344                         queue_iterate(q, entry, sched_entry_t, links) {
1345                                 group = group_for_entry(entry);
1346                                 if (group->runq.count > 0) {
1347                                         if (runq_scan(&group->runq))
1348                                                 return (TRUE);
1349                                 }
1350                                 count--;
1351                         }
1352                         q--;
1353                 }
1354         }
1355
1356         return (FALSE);
1357 }
1358
1359 static void
1360 sched_multiq_thread_update_scan(void)
1361 {
1362         boolean_t               restart_needed = FALSE;
1363         processor_t             processor = processor_list;
1364         processor_set_t         pset;
1365         thread_t                thread;
1366         spl_t                   s;
1367
1368         /*
1369          *  We update the threads associated with each processor (bound and idle threads)
1370          *  and then update the threads in each pset runqueue.
1371          */
1372
1373         do {
1374                 do {
1375                         pset = processor->processor_set;
1376
1377                         s = splsched();
1378                         pset_lock(pset);
1379
1380                         restart_needed = runq_scan(multiq_bound_runq(processor));
1381
1382                         pset_unlock(pset);
1383                         splx(s);
1384
1385                         if (restart_needed)
1386                                 break;
1387
1388                         thread = processor->idle_thread;
1389                         if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) {
1390                                 if (thread_update_add_thread(thread) == FALSE) {
1391                                         restart_needed = TRUE;
1392                                         break;
1393                                 }
1394                         }
1395                 } while ((processor = processor->processor_list) != NULL);
1396
1397                 /* Ok, we now have a collection of candidates -- fix them. */
1398                 thread_update_process_threads();
1399
1400         } while (restart_needed);
1401
1402         pset = &pset0;
1403
1404         do {
1405                 do {
1406                         s = splsched();
1407                         pset_lock(pset);
1408
1409                         restart_needed = group_scan(&pset->pset_runq);
1410
1411                         pset_unlock(pset);
1412                         splx(s);
1413
1414                         if (restart_needed)
1415                                 break;
1416                 } while ((pset = pset->pset_list) != NULL);
1417
1418                 /* Ok, we now have a collection of candidates -- fix them. */
1419                 thread_update_process_threads();
1420
1421         } while (restart_needed);
1422 }
1423
1424