osfmk/kern/processor.c

   1 /*
   2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58
  59 /*
  60  *      processor.c: processor and processor_set manipulation routines.
  61  */
  62
  63 #include <mach/boolean.h>
  64 #include <mach/policy.h>
  65 #include <mach/processor.h>
  66 #include <mach/processor_info.h>
  67 #include <mach/vm_param.h>
  68 #include <kern/cpu_number.h>
  69 #include <kern/host.h>
  70 #include <kern/ipc_host.h>
  71 #include <kern/ipc_tt.h>
  72 #include <kern/kalloc.h>
  73 #include <kern/machine.h>
  74 #include <kern/misc_protos.h>
  75 #include <kern/processor.h>
  76 #include <kern/sched.h>
  77 #include <kern/task.h>
  78 #include <kern/thread.h>
  79 #include <kern/timer.h>
  80 #if KPERF
  81 #include <kperf/kperf.h>
  82 #endif /* KPERF */
  83 #include <ipc/ipc_port.h>
  84
  85 #include <security/mac_mach_internal.h>
  86
  87 #if defined(CONFIG_XNUPOST)
  88
  89 #include <tests/xnupost.h>
  90
  91 #endif /* CONFIG_XNUPOST */
  92
  93 /*
  94  * Exported interface
  95  */
  96 #include <mach/mach_host_server.h>
  97 #include <mach/processor_set_server.h>
  98
  99 struct processor_set    pset0;
 100 struct pset_node        pset_node0;
 101
 102 static SIMPLE_LOCK_DECLARE(pset_node_lock, 0);
 103 LCK_GRP_DECLARE(pset_lck_grp, "pset");
 104
 105 queue_head_t            tasks;
 106 queue_head_t            terminated_tasks;       /* To be used ONLY for stackshot. */
 107 queue_head_t            corpse_tasks;
 108 int                     tasks_count;
 109 int                     terminated_tasks_count;
 110 queue_head_t            threads;
 111 int                     threads_count;
 112 LCK_GRP_DECLARE(task_lck_grp, "task");
 113 LCK_ATTR_DECLARE(task_lck_attr, 0, 0);
 114 LCK_MTX_DECLARE_ATTR(tasks_threads_lock, &task_lck_grp, &task_lck_attr);
 115 LCK_MTX_DECLARE_ATTR(tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
 116
 117 processor_t             processor_list;
 118 unsigned int            processor_count;
 119 static processor_t      processor_list_tail;
 120 SIMPLE_LOCK_DECLARE(processor_list_lock, 0);
 121
 122 uint32_t                processor_avail_count;
 123 uint32_t                processor_avail_count_user;
 124 uint32_t                primary_processor_avail_count;
 125 uint32_t                primary_processor_avail_count_user;
 126
 127 int                     master_cpu = 0;
 128
 129 struct processor        PERCPU_DATA(processor);
 130 processor_t             processor_array[MAX_SCHED_CPUS] = { 0 };
 131 processor_set_t         pset_array[MAX_PSETS] = { 0 };
 132
 133 static timer_call_func_t running_timer_funcs[] = {
 134         [RUNNING_TIMER_QUANTUM] = thread_quantum_expire,
 135         [RUNNING_TIMER_KPERF] = kperf_timer_expire,
 136 };
 137 static_assert(sizeof(running_timer_funcs) / sizeof(running_timer_funcs[0])
 138     == RUNNING_TIMER_MAX, "missing running timer function");
 139
 140 #if defined(CONFIG_XNUPOST)
 141 kern_return_t ipi_test(void);
 142 extern void arm64_ipi_test(void);
 143
 144 kern_return_t
 145 ipi_test()
 146 {
 147 #if __arm64__
 148         processor_t p;
 149
 150         for (p = processor_list; p != NULL; p = p->processor_list) {
 151                 thread_bind(p);
 152                 thread_block(THREAD_CONTINUE_NULL);
 153                 kprintf("Running IPI test on cpu %d\n", p->cpu_id);
 154                 arm64_ipi_test();
 155         }
 156
 157         /* unbind thread from specific cpu */
 158         thread_bind(PROCESSOR_NULL);
 159         thread_block(THREAD_CONTINUE_NULL);
 160
 161         T_PASS("Done running IPI tests");
 162 #else
 163         T_PASS("Unsupported platform. Not running IPI tests");
 164
 165 #endif /* __arm64__ */
 166
 167         return KERN_SUCCESS;
 168 }
 169 #endif /* defined(CONFIG_XNUPOST) */
 170
 171 int sched_enable_smt = 1;
 172
 173 void
 174 processor_bootstrap(void)
 175 {
 176         pset_node0.psets = &pset0;
 177         pset_init(&pset0, &pset_node0);
 178
 179         queue_init(&tasks);
 180         queue_init(&terminated_tasks);
 181         queue_init(&threads);
 182         queue_init(&corpse_tasks);
 183
 184         processor_init(master_processor, master_cpu, &pset0);
 185 }
 186
 187 /*
 188  *      Initialize the given processor for the cpu
 189  *      indicated by cpu_id, and assign to the
 190  *      specified processor set.
 191  */
 192 void
 193 processor_init(
 194         processor_t            processor,
 195         int                    cpu_id,
 196         processor_set_t        pset)
 197 {
 198         spl_t           s;
 199
 200         assert(cpu_id < MAX_SCHED_CPUS);
 201         processor->cpu_id = cpu_id;
 202
 203         if (processor != master_processor) {
 204                 /* Scheduler state for master_processor initialized in sched_init() */
 205                 SCHED(processor_init)(processor);
 206         }
 207
 208         processor->state = PROCESSOR_OFF_LINE;
 209         processor->active_thread = processor->startup_thread = processor->idle_thread = THREAD_NULL;
 210         processor->processor_set = pset;
 211         processor_state_update_idle(processor);
 212         processor->starting_pri = MINPRI;
 213         processor->quantum_end = UINT64_MAX;
 214         processor->deadline = UINT64_MAX;
 215         processor->first_timeslice = FALSE;
 216         processor->processor_offlined = false;
 217         processor->processor_primary = processor; /* no SMT relationship known at this point */
 218         processor->processor_secondary = NULL;
 219         processor->is_SMT = false;
 220         processor->is_recommended = true;
 221         processor->processor_self = IP_NULL;
 222         processor->processor_list = NULL;
 223         processor->must_idle = false;
 224         processor->running_timers_active = false;
 225         for (int i = 0; i < RUNNING_TIMER_MAX; i++) {
 226                 timer_call_setup(&processor->running_timers[i],
 227                     running_timer_funcs[i], processor);
 228                 running_timer_clear(processor, i);
 229         }
 230
 231         timer_init(&processor->idle_state);
 232         timer_init(&processor->system_state);
 233         timer_init(&processor->user_state);
 234
 235         s = splsched();
 236         pset_lock(pset);
 237         bit_set(pset->cpu_bitmask, cpu_id);
 238         bit_set(pset->recommended_bitmask, cpu_id);
 239         bit_set(pset->primary_map, cpu_id);
 240         bit_set(pset->cpu_state_map[PROCESSOR_OFF_LINE], cpu_id);
 241         if (pset->cpu_set_count++ == 0) {
 242                 pset->cpu_set_low = pset->cpu_set_hi = cpu_id;
 243         } else {
 244                 pset->cpu_set_low = (cpu_id < pset->cpu_set_low)? cpu_id: pset->cpu_set_low;
 245                 pset->cpu_set_hi = (cpu_id > pset->cpu_set_hi)? cpu_id: pset->cpu_set_hi;
 246         }
 247         pset_unlock(pset);
 248         splx(s);
 249
 250         simple_lock(&processor_list_lock, LCK_GRP_NULL);
 251         if (processor_list == NULL) {
 252                 processor_list = processor;
 253         } else {
 254                 processor_list_tail->processor_list = processor;
 255         }
 256         processor_list_tail = processor;
 257         processor_count++;
 258         processor_array[cpu_id] = processor;
 259         simple_unlock(&processor_list_lock);
 260 }
 261
 262 bool system_is_SMT = false;
 263
 264 void
 265 processor_set_primary(
 266         processor_t             processor,
 267         processor_t             primary)
 268 {
 269         assert(processor->processor_primary == primary || processor->processor_primary == processor);
 270         /* Re-adjust primary point for this (possibly) secondary processor */
 271         processor->processor_primary = primary;
 272
 273         assert(primary->processor_secondary == NULL || primary->processor_secondary == processor);
 274         if (primary != processor) {
 275                 /* Link primary to secondary, assumes a 2-way SMT model
 276                  * We'll need to move to a queue if any future architecture
 277                  * requires otherwise.
 278                  */
 279                 assert(processor->processor_secondary == NULL);
 280                 primary->processor_secondary = processor;
 281                 /* Mark both processors as SMT siblings */
 282                 primary->is_SMT = TRUE;
 283                 processor->is_SMT = TRUE;
 284
 285                 if (!system_is_SMT) {
 286                         system_is_SMT = true;
 287                 }
 288
 289                 processor_set_t pset = processor->processor_set;
 290                 spl_t s = splsched();
 291                 pset_lock(pset);
 292                 if (!pset->is_SMT) {
 293                         pset->is_SMT = true;
 294                 }
 295                 bit_clear(pset->primary_map, processor->cpu_id);
 296                 pset_unlock(pset);
 297                 splx(s);
 298         }
 299 }
 300
 301 processor_set_t
 302 processor_pset(
 303         processor_t     processor)
 304 {
 305         return processor->processor_set;
 306 }
 307
 308 #if CONFIG_SCHED_EDGE
 309
 310 cluster_type_t
 311 pset_type_for_id(uint32_t cluster_id)
 312 {
 313         return pset_array[cluster_id]->pset_type;
 314 }
 315
 316 /*
 317  * Processor foreign threads
 318  *
 319  * With the Edge scheduler, each pset maintains a bitmap of processors running threads
 320  * which are foreign to the pset/cluster. A thread is defined as foreign for a cluster
 321  * if its of a different type than its preferred cluster type (E/P). The bitmap should
 322  * be updated every time a new thread is assigned to run on a processor.
 323  *
 324  * This bitmap allows the Edge scheduler to quickly find CPUs running foreign threads
 325  * for rebalancing.
 326  */
 327 static void
 328 processor_state_update_running_foreign(processor_t processor, thread_t thread)
 329 {
 330         cluster_type_t current_processor_type = pset_type_for_id(processor->processor_set->pset_cluster_id);
 331         cluster_type_t thread_type = pset_type_for_id(sched_edge_thread_preferred_cluster(thread));
 332
 333         /* Update the bitmap for the pset only for unbounded non-RT threads. */
 334         if ((processor->current_pri < BASEPRI_RTQUEUES) && (thread->bound_processor == PROCESSOR_NULL) && (current_processor_type != thread_type)) {
 335                 bit_set(processor->processor_set->cpu_running_foreign, processor->cpu_id);
 336         } else {
 337                 bit_clear(processor->processor_set->cpu_running_foreign, processor->cpu_id);
 338         }
 339 }
 340 #else /* CONFIG_SCHED_EDGE */
 341 static void
 342 processor_state_update_running_foreign(__unused processor_t processor, __unused thread_t thread)
 343 {
 344 }
 345 #endif /* CONFIG_SCHED_EDGE */
 346
 347 void
 348 processor_state_update_idle(processor_t processor)
 349 {
 350         processor->current_pri = IDLEPRI;
 351         processor->current_sfi_class = SFI_CLASS_KERNEL;
 352         processor->current_recommended_pset_type = PSET_SMP;
 353 #if CONFIG_THREAD_GROUPS
 354         processor->current_thread_group = NULL;
 355 #endif
 356         processor->current_perfctl_class = PERFCONTROL_CLASS_IDLE;
 357         processor->current_urgency = THREAD_URGENCY_NONE;
 358         processor->current_is_NO_SMT = false;
 359         processor->current_is_bound = false;
 360         os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], TH_BUCKET_SCHED_MAX, relaxed);
 361 }
 362
 363 void
 364 processor_state_update_from_thread(processor_t processor, thread_t thread)
 365 {
 366         processor->current_pri = thread->sched_pri;
 367         processor->current_sfi_class = thread->sfi_class;
 368         processor->current_recommended_pset_type = recommended_pset_type(thread);
 369         processor_state_update_running_foreign(processor, thread);
 370         /* Since idle and bound threads are not tracked by the edge scheduler, ignore when those threads go on-core */
 371         sched_bucket_t bucket = ((thread->state & TH_IDLE) || (thread->bound_processor != PROCESSOR_NULL)) ? TH_BUCKET_SCHED_MAX : thread->th_sched_bucket;
 372         os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
 373
 374 #if CONFIG_THREAD_GROUPS
 375         processor->current_thread_group = thread_group_get(thread);
 376 #endif
 377         processor->current_perfctl_class = thread_get_perfcontrol_class(thread);
 378         processor->current_urgency = thread_get_urgency(thread, NULL, NULL);
 379         processor->current_is_NO_SMT = thread_no_smt(thread);
 380         processor->current_is_bound = thread->bound_processor != PROCESSOR_NULL;
 381 }
 382
 383 void
 384 processor_state_update_explicit(processor_t processor, int pri, sfi_class_id_t sfi_class,
 385     pset_cluster_type_t pset_type, perfcontrol_class_t perfctl_class, thread_urgency_t urgency, sched_bucket_t bucket)
 386 {
 387         processor->current_pri = pri;
 388         processor->current_sfi_class = sfi_class;
 389         processor->current_recommended_pset_type = pset_type;
 390         processor->current_perfctl_class = perfctl_class;
 391         processor->current_urgency = urgency;
 392         os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
 393 }
 394
 395 pset_node_t
 396 pset_node_root(void)
 397 {
 398         return &pset_node0;
 399 }
 400
 401 processor_set_t
 402 pset_create(
 403         pset_node_t                     node)
 404 {
 405         /* some schedulers do not support multiple psets */
 406         if (SCHED(multiple_psets_enabled) == FALSE) {
 407                 return processor_pset(master_processor);
 408         }
 409
 410         processor_set_t *prev, pset = zalloc_permanent_type(struct processor_set);
 411
 412         if (pset != PROCESSOR_SET_NULL) {
 413                 pset_init(pset, node);
 414
 415                 simple_lock(&pset_node_lock, LCK_GRP_NULL);
 416
 417                 prev = &node->psets;
 418                 while (*prev != PROCESSOR_SET_NULL) {
 419                         prev = &(*prev)->pset_list;
 420                 }
 421
 422                 *prev = pset;
 423
 424                 simple_unlock(&pset_node_lock);
 425         }
 426
 427         return pset;
 428 }
 429
 430 /*
 431  *      Find processor set with specified cluster_id.
 432  *      Returns default_pset if not found.
 433  */
 434 processor_set_t
 435 pset_find(
 436         uint32_t cluster_id,
 437         processor_set_t default_pset)
 438 {
 439         simple_lock(&pset_node_lock, LCK_GRP_NULL);
 440         pset_node_t node = &pset_node0;
 441         processor_set_t pset = NULL;
 442
 443         do {
 444                 pset = node->psets;
 445                 while (pset != NULL) {
 446                         if (pset->pset_cluster_id == cluster_id) {
 447                                 break;
 448                         }
 449                         pset = pset->pset_list;
 450                 }
 451         } while (pset == NULL && (node = node->node_list) != NULL);
 452         simple_unlock(&pset_node_lock);
 453         if (pset == NULL) {
 454                 return default_pset;
 455         }
 456         return pset;
 457 }
 458
 459
 460 /*
 461  *      Initialize the given processor_set structure.
 462  */
 463 void
 464 pset_init(
 465         processor_set_t         pset,
 466         pset_node_t                     node)
 467 {
 468         static uint32_t pset_count = 0;
 469
 470         if (pset != &pset0) {
 471                 /*
 472                  * Scheduler runqueue initialization for non-boot psets.
 473                  * This initialization for pset0 happens in sched_init().
 474                  */
 475                 SCHED(pset_init)(pset);
 476                 SCHED(rt_init)(pset);
 477         }
 478
 479         pset->online_processor_count = 0;
 480         pset->load_average = 0;
 481         bzero(&pset->pset_load_average, sizeof(pset->pset_load_average));
 482 #if CONFIG_SCHED_EDGE
 483         bzero(&pset->pset_execution_time, sizeof(pset->pset_execution_time));
 484 #endif /* CONFIG_SCHED_EDGE */
 485         pset->cpu_set_low = pset->cpu_set_hi = 0;
 486         pset->cpu_set_count = 0;
 487         pset->last_chosen = -1;
 488         pset->cpu_bitmask = 0;
 489         pset->recommended_bitmask = 0;
 490         pset->primary_map = 0;
 491         pset->realtime_map = 0;
 492         pset->cpu_running_foreign = 0;
 493
 494         for (uint i = 0; i < PROCESSOR_STATE_LEN; i++) {
 495                 pset->cpu_state_map[i] = 0;
 496         }
 497         pset->pending_AST_URGENT_cpu_mask = 0;
 498         pset->pending_AST_PREEMPT_cpu_mask = 0;
 499 #if defined(CONFIG_SCHED_DEFERRED_AST)
 500         pset->pending_deferred_AST_cpu_mask = 0;
 501 #endif
 502         pset->pending_spill_cpu_mask = 0;
 503         pset_lock_init(pset);
 504         pset->pset_self = IP_NULL;
 505         pset->pset_name_self = IP_NULL;
 506         pset->pset_list = PROCESSOR_SET_NULL;
 507         pset->node = node;
 508
 509         /*
 510          * The pset_cluster_type & pset_cluster_id for all psets
 511          * on the platform are initialized as part of the SCHED(init).
 512          * That works well for small cluster platforms; for large cluster
 513          * count systems, it might be cleaner to do all the setup
 514          * dynamically in SCHED(pset_init).
 515          *
 516          * <Edge Multi-cluster Support Needed>
 517          */
 518         pset->is_SMT = false;
 519
 520         simple_lock(&pset_node_lock, LCK_GRP_NULL);
 521         pset->pset_id = pset_count++;
 522         bit_set(node->pset_map, pset->pset_id);
 523         simple_unlock(&pset_node_lock);
 524
 525         pset_array[pset->pset_id] = pset;
 526 }
 527
 528 kern_return_t
 529 processor_info_count(
 530         processor_flavor_t              flavor,
 531         mach_msg_type_number_t  *count)
 532 {
 533         switch (flavor) {
 534         case PROCESSOR_BASIC_INFO:
 535                 *count = PROCESSOR_BASIC_INFO_COUNT;
 536                 break;
 537
 538         case PROCESSOR_CPU_LOAD_INFO:
 539                 *count = PROCESSOR_CPU_LOAD_INFO_COUNT;
 540                 break;
 541
 542         default:
 543                 return cpu_info_count(flavor, count);
 544         }
 545
 546         return KERN_SUCCESS;
 547 }
 548
 549
 550 kern_return_t
 551 processor_info(
 552         processor_t     processor,
 553         processor_flavor_t              flavor,
 554         host_t                                  *host,
 555         processor_info_t                info,
 556         mach_msg_type_number_t  *count)
 557 {
 558         int     cpu_id, state;
 559         kern_return_t   result;
 560
 561         if (processor == PROCESSOR_NULL) {
 562                 return KERN_INVALID_ARGUMENT;
 563         }
 564
 565         cpu_id = processor->cpu_id;
 566
 567         switch (flavor) {
 568         case PROCESSOR_BASIC_INFO:
 569         {
 570                 processor_basic_info_t          basic_info;
 571
 572                 if (*count < PROCESSOR_BASIC_INFO_COUNT) {
 573                         return KERN_FAILURE;
 574                 }
 575
 576                 basic_info = (processor_basic_info_t) info;
 577                 basic_info->cpu_type = slot_type(cpu_id);
 578                 basic_info->cpu_subtype = slot_subtype(cpu_id);
 579                 state = processor->state;
 580                 if (state == PROCESSOR_OFF_LINE
 581 #if defined(__x86_64__)
 582                     || !processor->is_recommended
 583 #endif
 584                     ) {
 585                         basic_info->running = FALSE;
 586                 } else {
 587                         basic_info->running = TRUE;
 588                 }
 589                 basic_info->slot_num = cpu_id;
 590                 if (processor == master_processor) {
 591                         basic_info->is_master = TRUE;
 592                 } else {
 593                         basic_info->is_master = FALSE;
 594                 }
 595
 596                 *count = PROCESSOR_BASIC_INFO_COUNT;
 597                 *host = &realhost;
 598
 599                 return KERN_SUCCESS;
 600         }
 601
 602         case PROCESSOR_CPU_LOAD_INFO:
 603         {
 604                 processor_cpu_load_info_t       cpu_load_info;
 605                 timer_t         idle_state;
 606                 uint64_t        idle_time_snapshot1, idle_time_snapshot2;
 607                 uint64_t        idle_time_tstamp1, idle_time_tstamp2;
 608
 609                 /*
 610                  * We capture the accumulated idle time twice over
 611                  * the course of this function, as well as the timestamps
 612                  * when each were last updated. Since these are
 613                  * all done using non-atomic racy mechanisms, the
 614                  * most we can infer is whether values are stable.
 615                  * timer_grab() is the only function that can be
 616                  * used reliably on another processor's per-processor
 617                  * data.
 618                  */
 619
 620                 if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT) {
 621                         return KERN_FAILURE;
 622                 }
 623
 624                 cpu_load_info = (processor_cpu_load_info_t) info;
 625                 if (precise_user_kernel_time) {
 626                         cpu_load_info->cpu_ticks[CPU_STATE_USER] =
 627                             (uint32_t)(timer_grab(&processor->user_state) / hz_tick_interval);
 628                         cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] =
 629                             (uint32_t)(timer_grab(&processor->system_state) / hz_tick_interval);
 630                 } else {
 631                         uint64_t tval = timer_grab(&processor->user_state) +
 632                             timer_grab(&processor->system_state);
 633
 634                         cpu_load_info->cpu_ticks[CPU_STATE_USER] = (uint32_t)(tval / hz_tick_interval);
 635                         cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] = 0;
 636                 }
 637
 638                 idle_state = &processor->idle_state;
 639                 idle_time_snapshot1 = timer_grab(idle_state);
 640                 idle_time_tstamp1 = idle_state->tstamp;
 641
 642                 /*
 643                  * Idle processors are not continually updating their
 644                  * per-processor idle timer, so it may be extremely
 645                  * out of date, resulting in an over-representation
 646                  * of non-idle time between two measurement
 647                  * intervals by e.g. top(1). If we are non-idle, or
 648                  * have evidence that the timer is being updated
 649                  * concurrently, we consider its value up-to-date.
 650                  */
 651                 if (processor->current_state != idle_state) {
 652                         cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
 653                             (uint32_t)(idle_time_snapshot1 / hz_tick_interval);
 654                 } else if ((idle_time_snapshot1 != (idle_time_snapshot2 = timer_grab(idle_state))) ||
 655                     (idle_time_tstamp1 != (idle_time_tstamp2 = idle_state->tstamp))) {
 656                         /* Idle timer is being updated concurrently, second stamp is good enough */
 657                         cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
 658                             (uint32_t)(idle_time_snapshot2 / hz_tick_interval);
 659                 } else {
 660                         /*
 661                          * Idle timer may be very stale. Fortunately we have established
 662                          * that idle_time_snapshot1 and idle_time_tstamp1 are unchanging
 663                          */
 664                         idle_time_snapshot1 += mach_absolute_time() - idle_time_tstamp1;
 665
 666                         cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
 667                             (uint32_t)(idle_time_snapshot1 / hz_tick_interval);
 668                 }
 669
 670                 cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0;
 671
 672                 *count = PROCESSOR_CPU_LOAD_INFO_COUNT;
 673                 *host = &realhost;
 674
 675                 return KERN_SUCCESS;
 676         }
 677
 678         default:
 679                 result = cpu_info(flavor, cpu_id, info, count);
 680                 if (result == KERN_SUCCESS) {
 681                         *host = &realhost;
 682                 }
 683
 684                 return result;
 685         }
 686 }
 687
 688 kern_return_t
 689 processor_start(
 690         processor_t                     processor)
 691 {
 692         processor_set_t         pset;
 693         thread_t                        thread;
 694         kern_return_t           result;
 695         spl_t                           s;
 696
 697         if (processor == PROCESSOR_NULL || processor->processor_set == PROCESSOR_SET_NULL) {
 698                 return KERN_INVALID_ARGUMENT;
 699         }
 700
 701         if (processor == master_processor) {
 702                 processor_t             prev;
 703
 704                 prev = thread_bind(processor);
 705                 thread_block(THREAD_CONTINUE_NULL);
 706
 707                 result = cpu_start(processor->cpu_id);
 708
 709                 thread_bind(prev);
 710
 711                 return result;
 712         }
 713
 714         bool scheduler_disable = false;
 715
 716         if ((processor->processor_primary != processor) && (sched_enable_smt == 0)) {
 717                 if (cpu_can_exit(processor->cpu_id)) {
 718                         return KERN_SUCCESS;
 719                 }
 720                 /*
 721                  * This secondary SMT processor must start in order to service interrupts,
 722                  * so instead it will be disabled at the scheduler level.
 723                  */
 724                 scheduler_disable = true;
 725         }
 726
 727         ml_cpu_begin_state_transition(processor->cpu_id);
 728         s = splsched();
 729         pset = processor->processor_set;
 730         pset_lock(pset);
 731         if (processor->state != PROCESSOR_OFF_LINE) {
 732                 pset_unlock(pset);
 733                 splx(s);
 734                 ml_cpu_end_state_transition(processor->cpu_id);
 735
 736                 return KERN_FAILURE;
 737         }
 738
 739         pset_update_processor_state(pset, processor, PROCESSOR_START);
 740         pset_unlock(pset);
 741         splx(s);
 742
 743         /*
 744          *      Create the idle processor thread.
 745          */
 746         if (processor->idle_thread == THREAD_NULL) {
 747                 result = idle_thread_create(processor);
 748                 if (result != KERN_SUCCESS) {
 749                         s = splsched();
 750                         pset_lock(pset);
 751                         pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
 752                         pset_unlock(pset);
 753                         splx(s);
 754                         ml_cpu_end_state_transition(processor->cpu_id);
 755
 756                         return result;
 757                 }
 758         }
 759
 760         /*
 761          *      If there is no active thread, the processor
 762          *      has never been started.  Create a dedicated
 763          *      start up thread.
 764          */
 765         if (processor->active_thread == THREAD_NULL &&
 766             processor->startup_thread == THREAD_NULL) {
 767                 result = kernel_thread_create(processor_start_thread, NULL, MAXPRI_KERNEL, &thread);
 768                 if (result != KERN_SUCCESS) {
 769                         s = splsched();
 770                         pset_lock(pset);
 771                         pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
 772                         pset_unlock(pset);
 773                         splx(s);
 774                         ml_cpu_end_state_transition(processor->cpu_id);
 775
 776                         return result;
 777                 }
 778
 779                 s = splsched();
 780                 thread_lock(thread);
 781                 thread->bound_processor = processor;
 782                 processor->startup_thread = thread;
 783                 thread->state = TH_RUN;
 784                 thread->last_made_runnable_time = mach_absolute_time();
 785                 thread_unlock(thread);
 786                 splx(s);
 787
 788                 thread_deallocate(thread);
 789         }
 790
 791         if (processor->processor_self == IP_NULL) {
 792                 ipc_processor_init(processor);
 793         }
 794
 795         ml_broadcast_cpu_event(CPU_BOOT_REQUESTED, processor->cpu_id);
 796         result = cpu_start(processor->cpu_id);
 797         if (result != KERN_SUCCESS) {
 798                 s = splsched();
 799                 pset_lock(pset);
 800                 pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
 801                 pset_unlock(pset);
 802                 splx(s);
 803                 ml_cpu_end_state_transition(processor->cpu_id);
 804
 805                 return result;
 806         }
 807         if (scheduler_disable) {
 808                 assert(processor->processor_primary != processor);
 809                 sched_processor_enable(processor, FALSE);
 810         }
 811
 812         ipc_processor_enable(processor);
 813         ml_cpu_end_state_transition(processor->cpu_id);
 814         ml_broadcast_cpu_event(CPU_ACTIVE, processor->cpu_id);
 815
 816         return KERN_SUCCESS;
 817 }
 818
 819
 820 kern_return_t
 821 processor_exit(
 822         processor_t     processor)
 823 {
 824         if (processor == PROCESSOR_NULL) {
 825                 return KERN_INVALID_ARGUMENT;
 826         }
 827
 828         return processor_shutdown(processor);
 829 }
 830
 831
 832 kern_return_t
 833 processor_start_from_user(
 834         processor_t                     processor)
 835 {
 836         kern_return_t ret;
 837
 838         if (processor == PROCESSOR_NULL) {
 839                 return KERN_INVALID_ARGUMENT;
 840         }
 841
 842         if (!cpu_can_exit(processor->cpu_id)) {
 843                 ret = sched_processor_enable(processor, TRUE);
 844         } else {
 845                 ret = processor_start(processor);
 846         }
 847
 848         return ret;
 849 }
 850
 851 kern_return_t
 852 processor_exit_from_user(
 853         processor_t     processor)
 854 {
 855         kern_return_t ret;
 856
 857         if (processor == PROCESSOR_NULL) {
 858                 return KERN_INVALID_ARGUMENT;
 859         }
 860
 861         if (!cpu_can_exit(processor->cpu_id)) {
 862                 ret = sched_processor_enable(processor, FALSE);
 863         } else {
 864                 ret = processor_shutdown(processor);
 865         }
 866
 867         return ret;
 868 }
 869
 870 kern_return_t
 871 enable_smt_processors(bool enable)
 872 {
 873         if (machine_info.logical_cpu_max == machine_info.physical_cpu_max) {
 874                 /* Not an SMT system */
 875                 return KERN_INVALID_ARGUMENT;
 876         }
 877
 878         int ncpus = machine_info.logical_cpu_max;
 879
 880         for (int i = 1; i < ncpus; i++) {
 881                 processor_t processor = processor_array[i];
 882
 883                 if (processor->processor_primary != processor) {
 884                         if (enable) {
 885                                 processor_start_from_user(processor);
 886                         } else { /* Disable */
 887                                 processor_exit_from_user(processor);
 888                         }
 889                 }
 890         }
 891
 892 #define BSD_HOST 1
 893         host_basic_info_data_t hinfo;
 894         mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
 895         kern_return_t kret = host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
 896         if (kret != KERN_SUCCESS) {
 897                 return kret;
 898         }
 899
 900         if (enable && (hinfo.logical_cpu != hinfo.logical_cpu_max)) {
 901                 return KERN_FAILURE;
 902         }
 903
 904         if (!enable && (hinfo.logical_cpu != hinfo.physical_cpu)) {
 905                 return KERN_FAILURE;
 906         }
 907
 908         return KERN_SUCCESS;
 909 }
 910
 911 kern_return_t
 912 processor_control(
 913         processor_t             processor,
 914         processor_info_t        info,
 915         mach_msg_type_number_t  count)
 916 {
 917         if (processor == PROCESSOR_NULL) {
 918                 return KERN_INVALID_ARGUMENT;
 919         }
 920
 921         return cpu_control(processor->cpu_id, info, count);
 922 }
 923
 924 kern_return_t
 925 processor_set_create(
 926         __unused host_t         host,
 927         __unused processor_set_t        *new_set,
 928         __unused processor_set_t        *new_name)
 929 {
 930         return KERN_FAILURE;
 931 }
 932
 933 kern_return_t
 934 processor_set_destroy(
 935         __unused processor_set_t        pset)
 936 {
 937         return KERN_FAILURE;
 938 }
 939
 940 kern_return_t
 941 processor_get_assignment(
 942         processor_t     processor,
 943         processor_set_t *pset)
 944 {
 945         int state;
 946
 947         if (processor == PROCESSOR_NULL) {
 948                 return KERN_INVALID_ARGUMENT;
 949         }
 950
 951         state = processor->state;
 952         if (state == PROCESSOR_SHUTDOWN || state == PROCESSOR_OFF_LINE) {
 953                 return KERN_FAILURE;
 954         }
 955
 956         *pset = &pset0;
 957
 958         return KERN_SUCCESS;
 959 }
 960
 961 kern_return_t
 962 processor_set_info(
 963         processor_set_t         pset,
 964         int                     flavor,
 965         host_t                  *host,
 966         processor_set_info_t    info,
 967         mach_msg_type_number_t  *count)
 968 {
 969         if (pset == PROCESSOR_SET_NULL) {
 970                 return KERN_INVALID_ARGUMENT;
 971         }
 972
 973         if (flavor == PROCESSOR_SET_BASIC_INFO) {
 974                 processor_set_basic_info_t      basic_info;
 975
 976                 if (*count < PROCESSOR_SET_BASIC_INFO_COUNT) {
 977                         return KERN_FAILURE;
 978                 }
 979
 980                 basic_info = (processor_set_basic_info_t) info;
 981 #if defined(__x86_64__)
 982                 basic_info->processor_count = processor_avail_count_user;
 983 #else
 984                 basic_info->processor_count = processor_avail_count;
 985 #endif
 986                 basic_info->default_policy = POLICY_TIMESHARE;
 987
 988                 *count = PROCESSOR_SET_BASIC_INFO_COUNT;
 989                 *host = &realhost;
 990                 return KERN_SUCCESS;
 991         } else if (flavor == PROCESSOR_SET_TIMESHARE_DEFAULT) {
 992                 policy_timeshare_base_t ts_base;
 993
 994                 if (*count < POLICY_TIMESHARE_BASE_COUNT) {
 995                         return KERN_FAILURE;
 996                 }
 997
 998                 ts_base = (policy_timeshare_base_t) info;
 999                 ts_base->base_priority = BASEPRI_DEFAULT;
1000
1001                 *count = POLICY_TIMESHARE_BASE_COUNT;
1002                 *host = &realhost;
1003                 return KERN_SUCCESS;
1004         } else if (flavor == PROCESSOR_SET_FIFO_DEFAULT) {
1005                 policy_fifo_base_t              fifo_base;
1006
1007                 if (*count < POLICY_FIFO_BASE_COUNT) {
1008                         return KERN_FAILURE;
1009                 }
1010
1011                 fifo_base = (policy_fifo_base_t) info;
1012                 fifo_base->base_priority = BASEPRI_DEFAULT;
1013
1014                 *count = POLICY_FIFO_BASE_COUNT;
1015                 *host = &realhost;
1016                 return KERN_SUCCESS;
1017         } else if (flavor == PROCESSOR_SET_RR_DEFAULT) {
1018                 policy_rr_base_t                rr_base;
1019
1020                 if (*count < POLICY_RR_BASE_COUNT) {
1021                         return KERN_FAILURE;
1022                 }
1023
1024                 rr_base = (policy_rr_base_t) info;
1025                 rr_base->base_priority = BASEPRI_DEFAULT;
1026                 rr_base->quantum = 1;
1027
1028                 *count = POLICY_RR_BASE_COUNT;
1029                 *host = &realhost;
1030                 return KERN_SUCCESS;
1031         } else if (flavor == PROCESSOR_SET_TIMESHARE_LIMITS) {
1032                 policy_timeshare_limit_t        ts_limit;
1033
1034                 if (*count < POLICY_TIMESHARE_LIMIT_COUNT) {
1035                         return KERN_FAILURE;
1036                 }
1037
1038                 ts_limit = (policy_timeshare_limit_t) info;
1039                 ts_limit->max_priority = MAXPRI_KERNEL;
1040
1041                 *count = POLICY_TIMESHARE_LIMIT_COUNT;
1042                 *host = &realhost;
1043                 return KERN_SUCCESS;
1044         } else if (flavor == PROCESSOR_SET_FIFO_LIMITS) {
1045                 policy_fifo_limit_t             fifo_limit;
1046
1047                 if (*count < POLICY_FIFO_LIMIT_COUNT) {
1048                         return KERN_FAILURE;
1049                 }
1050
1051                 fifo_limit = (policy_fifo_limit_t) info;
1052                 fifo_limit->max_priority = MAXPRI_KERNEL;
1053
1054                 *count = POLICY_FIFO_LIMIT_COUNT;
1055                 *host = &realhost;
1056                 return KERN_SUCCESS;
1057         } else if (flavor == PROCESSOR_SET_RR_LIMITS) {
1058                 policy_rr_limit_t               rr_limit;
1059
1060                 if (*count < POLICY_RR_LIMIT_COUNT) {
1061                         return KERN_FAILURE;
1062                 }
1063
1064                 rr_limit = (policy_rr_limit_t) info;
1065                 rr_limit->max_priority = MAXPRI_KERNEL;
1066
1067                 *count = POLICY_RR_LIMIT_COUNT;
1068                 *host = &realhost;
1069                 return KERN_SUCCESS;
1070         } else if (flavor == PROCESSOR_SET_ENABLED_POLICIES) {
1071                 int                             *enabled;
1072
1073                 if (*count < (sizeof(*enabled) / sizeof(int))) {
1074                         return KERN_FAILURE;
1075                 }
1076
1077                 enabled = (int *) info;
1078                 *enabled = POLICY_TIMESHARE | POLICY_RR | POLICY_FIFO;
1079
1080                 *count = sizeof(*enabled) / sizeof(int);
1081                 *host = &realhost;
1082                 return KERN_SUCCESS;
1083         }
1084
1085
1086         *host = HOST_NULL;
1087         return KERN_INVALID_ARGUMENT;
1088 }
1089
1090 /*
1091  *      processor_set_statistics
1092  *
1093  *      Returns scheduling statistics for a processor set.
1094  */
1095 kern_return_t
1096 processor_set_statistics(
1097         processor_set_t         pset,
1098         int                     flavor,
1099         processor_set_info_t    info,
1100         mach_msg_type_number_t  *count)
1101 {
1102         if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1103                 return KERN_INVALID_PROCESSOR_SET;
1104         }
1105
1106         if (flavor == PROCESSOR_SET_LOAD_INFO) {
1107                 processor_set_load_info_t     load_info;
1108
1109                 if (*count < PROCESSOR_SET_LOAD_INFO_COUNT) {
1110                         return KERN_FAILURE;
1111                 }
1112
1113                 load_info = (processor_set_load_info_t) info;
1114
1115                 load_info->mach_factor = sched_mach_factor;
1116                 load_info->load_average = sched_load_average;
1117
1118                 load_info->task_count = tasks_count;
1119                 load_info->thread_count = threads_count;
1120
1121                 *count = PROCESSOR_SET_LOAD_INFO_COUNT;
1122                 return KERN_SUCCESS;
1123         }
1124
1125         return KERN_INVALID_ARGUMENT;
1126 }
1127
1128 /*
1129  *      processor_set_max_priority:
1130  *
1131  *      Specify max priority permitted on processor set.  This affects
1132  *      newly created and assigned threads.  Optionally change existing
1133  *      ones.
1134  */
1135 kern_return_t
1136 processor_set_max_priority(
1137         __unused processor_set_t        pset,
1138         __unused int                    max_priority,
1139         __unused boolean_t              change_threads)
1140 {
1141         return KERN_INVALID_ARGUMENT;
1142 }
1143
1144 /*
1145  *      processor_set_policy_enable:
1146  *
1147  *      Allow indicated policy on processor set.
1148  */
1149
1150 kern_return_t
1151 processor_set_policy_enable(
1152         __unused processor_set_t        pset,
1153         __unused int                    policy)
1154 {
1155         return KERN_INVALID_ARGUMENT;
1156 }
1157
1158 /*
1159  *      processor_set_policy_disable:
1160  *
1161  *      Forbid indicated policy on processor set.  Time sharing cannot
1162  *      be forbidden.
1163  */
1164 kern_return_t
1165 processor_set_policy_disable(
1166         __unused processor_set_t        pset,
1167         __unused int                    policy,
1168         __unused boolean_t              change_threads)
1169 {
1170         return KERN_INVALID_ARGUMENT;
1171 }
1172
1173 /*
1174  *      processor_set_things:
1175  *
1176  *      Common internals for processor_set_{threads,tasks}
1177  */
1178 static kern_return_t
1179 processor_set_things(
1180         processor_set_t pset,
1181         void **thing_list,
1182         mach_msg_type_number_t *count,
1183         int type)
1184 {
1185         unsigned int i;
1186         task_t task;
1187         thread_t thread;
1188
1189         task_t *task_list;
1190         unsigned int actual_tasks;
1191         vm_size_t task_size, task_size_needed;
1192
1193         thread_t *thread_list;
1194         unsigned int actual_threads;
1195         vm_size_t thread_size, thread_size_needed;
1196
1197         void *addr, *newaddr;
1198         vm_size_t size, size_needed;
1199
1200         if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1201                 return KERN_INVALID_ARGUMENT;
1202         }
1203
1204         task_size = 0;
1205         task_size_needed = 0;
1206         task_list = NULL;
1207         actual_tasks = 0;
1208
1209         thread_size = 0;
1210         thread_size_needed = 0;
1211         thread_list = NULL;
1212         actual_threads = 0;
1213
1214         for (;;) {
1215                 lck_mtx_lock(&tasks_threads_lock);
1216
1217                 /* do we have the memory we need? */
1218                 if (type == PSET_THING_THREAD) {
1219                         thread_size_needed = threads_count * sizeof(void *);
1220                 }
1221 #if !CONFIG_MACF
1222                 else
1223 #endif
1224                 task_size_needed = tasks_count * sizeof(void *);
1225
1226                 if (task_size_needed <= task_size &&
1227                     thread_size_needed <= thread_size) {
1228                         break;
1229                 }
1230
1231                 /* unlock and allocate more memory */
1232                 lck_mtx_unlock(&tasks_threads_lock);
1233
1234                 /* grow task array */
1235                 if (task_size_needed > task_size) {
1236                         if (task_size != 0) {
1237                                 kfree(task_list, task_size);
1238                         }
1239
1240                         assert(task_size_needed > 0);
1241                         task_size = task_size_needed;
1242
1243                         task_list = (task_t *)kalloc(task_size);
1244                         if (task_list == NULL) {
1245                                 if (thread_size != 0) {
1246                                         kfree(thread_list, thread_size);
1247                                 }
1248                                 return KERN_RESOURCE_SHORTAGE;
1249                         }
1250                 }
1251
1252                 /* grow thread array */
1253                 if (thread_size_needed > thread_size) {
1254                         if (thread_size != 0) {
1255                                 kfree(thread_list, thread_size);
1256                         }
1257
1258                         assert(thread_size_needed > 0);
1259                         thread_size = thread_size_needed;
1260
1261                         thread_list = (thread_t *)kalloc(thread_size);
1262                         if (thread_list == 0) {
1263                                 if (task_size != 0) {
1264                                         kfree(task_list, task_size);
1265                                 }
1266                                 return KERN_RESOURCE_SHORTAGE;
1267                         }
1268                 }
1269         }
1270
1271         /* OK, have memory and the list locked */
1272
1273         /* If we need it, get the thread list */
1274         if (type == PSET_THING_THREAD) {
1275                 for (thread = (thread_t)queue_first(&threads);
1276                     !queue_end(&threads, (queue_entry_t)thread);
1277                     thread = (thread_t)queue_next(&thread->threads)) {
1278 #if defined(SECURE_KERNEL)
1279                         if (thread->task != kernel_task) {
1280 #endif
1281                         thread_reference_internal(thread);
1282                         thread_list[actual_threads++] = thread;
1283 #if defined(SECURE_KERNEL)
1284                 }
1285 #endif
1286                 }
1287         }
1288 #if !CONFIG_MACF
1289         else {
1290 #endif
1291         /* get a list of the tasks */
1292         for (task = (task_t)queue_first(&tasks);
1293             !queue_end(&tasks, (queue_entry_t)task);
1294             task = (task_t)queue_next(&task->tasks)) {
1295 #if defined(SECURE_KERNEL)
1296                 if (task != kernel_task) {
1297 #endif
1298                 task_reference_internal(task);
1299                 task_list[actual_tasks++] = task;
1300 #if defined(SECURE_KERNEL)
1301         }
1302 #endif
1303         }
1304 #if !CONFIG_MACF
1305 }
1306 #endif
1307
1308         lck_mtx_unlock(&tasks_threads_lock);
1309
1310 #if CONFIG_MACF
1311         unsigned int j, used;
1312
1313         /* for each task, make sure we are allowed to examine it */
1314         for (i = used = 0; i < actual_tasks; i++) {
1315                 if (mac_task_check_expose_task(task_list[i])) {
1316                         task_deallocate(task_list[i]);
1317                         continue;
1318                 }
1319                 task_list[used++] = task_list[i];
1320         }
1321         actual_tasks = used;
1322         task_size_needed = actual_tasks * sizeof(void *);
1323
1324         if (type == PSET_THING_THREAD) {
1325                 /* for each thread (if any), make sure it's task is in the allowed list */
1326                 for (i = used = 0; i < actual_threads; i++) {
1327                         boolean_t found_task = FALSE;
1328
1329                         task = thread_list[i]->task;
1330                         for (j = 0; j < actual_tasks; j++) {
1331                                 if (task_list[j] == task) {
1332                                         found_task = TRUE;
1333                                         break;
1334                                 }
1335                         }
1336                         if (found_task) {
1337                                 thread_list[used++] = thread_list[i];
1338                         } else {
1339                                 thread_deallocate(thread_list[i]);
1340                         }
1341                 }
1342                 actual_threads = used;
1343                 thread_size_needed = actual_threads * sizeof(void *);
1344
1345                 /* done with the task list */
1346                 for (i = 0; i < actual_tasks; i++) {
1347                         task_deallocate(task_list[i]);
1348                 }
1349                 kfree(task_list, task_size);
1350                 task_size = 0;
1351                 actual_tasks = 0;
1352                 task_list = NULL;
1353         }
1354 #endif
1355
1356         if (type == PSET_THING_THREAD) {
1357                 if (actual_threads == 0) {
1358                         /* no threads available to return */
1359                         assert(task_size == 0);
1360                         if (thread_size != 0) {
1361                                 kfree(thread_list, thread_size);
1362                         }
1363                         *thing_list = NULL;
1364                         *count = 0;
1365                         return KERN_SUCCESS;
1366                 }
1367                 size_needed = actual_threads * sizeof(void *);
1368                 size = thread_size;
1369                 addr = thread_list;
1370         } else {
1371                 if (actual_tasks == 0) {
1372                         /* no tasks available to return */
1373                         assert(thread_size == 0);
1374                         if (task_size != 0) {
1375                                 kfree(task_list, task_size);
1376                         }
1377                         *thing_list = NULL;
1378                         *count = 0;
1379                         return KERN_SUCCESS;
1380                 }
1381                 size_needed = actual_tasks * sizeof(void *);
1382                 size = task_size;
1383                 addr = task_list;
1384         }
1385
1386         /* if we allocated too much, must copy */
1387         if (size_needed < size) {
1388                 newaddr = kalloc(size_needed);
1389                 if (newaddr == 0) {
1390                         for (i = 0; i < actual_tasks; i++) {
1391                                 if (type == PSET_THING_THREAD) {
1392                                         thread_deallocate(thread_list[i]);
1393                                 } else {
1394                                         task_deallocate(task_list[i]);
1395                                 }
1396                         }
1397                         if (size) {
1398                                 kfree(addr, size);
1399                         }
1400                         return KERN_RESOURCE_SHORTAGE;
1401                 }
1402
1403                 bcopy((void *) addr, (void *) newaddr, size_needed);
1404                 kfree(addr, size);
1405
1406                 addr = newaddr;
1407                 size = size_needed;
1408         }
1409
1410         *thing_list = (void **)addr;
1411         *count = (unsigned int)size / sizeof(void *);
1412
1413         return KERN_SUCCESS;
1414 }
1415
1416 /*
1417  *      processor_set_tasks:
1418  *
1419  *      List all tasks in the processor set.
1420  */
1421 static kern_return_t
1422 processor_set_tasks_internal(
1423         processor_set_t         pset,
1424         task_array_t            *task_list,
1425         mach_msg_type_number_t  *count,
1426         int                     flavor)
1427 {
1428         kern_return_t ret;
1429         mach_msg_type_number_t i;
1430
1431         ret = processor_set_things(pset, (void **)task_list, count, PSET_THING_TASK);
1432         if (ret != KERN_SUCCESS) {
1433                 return ret;
1434         }
1435
1436         /* do the conversion that Mig should handle */
1437         switch (flavor) {
1438         case TASK_FLAVOR_CONTROL:
1439                 for (i = 0; i < *count; i++) {
1440                         (*task_list)[i] = (task_t)convert_task_to_port((*task_list)[i]);
1441                 }
1442                 break;
1443         case TASK_FLAVOR_READ:
1444                 for (i = 0; i < *count; i++) {
1445                         (*task_list)[i] = (task_t)convert_task_read_to_port((*task_list)[i]);
1446                 }
1447                 break;
1448         case TASK_FLAVOR_INSPECT:
1449                 for (i = 0; i < *count; i++) {
1450                         (*task_list)[i] = (task_t)convert_task_inspect_to_port((*task_list)[i]);
1451                 }
1452                 break;
1453         case TASK_FLAVOR_NAME:
1454                 for (i = 0; i < *count; i++) {
1455                         (*task_list)[i] = (task_t)convert_task_name_to_port((*task_list)[i]);
1456                 }
1457                 break;
1458         default:
1459                 return KERN_INVALID_ARGUMENT;
1460         }
1461
1462         return KERN_SUCCESS;
1463 }
1464
1465 kern_return_t
1466 processor_set_tasks(
1467         processor_set_t         pset,
1468         task_array_t            *task_list,
1469         mach_msg_type_number_t  *count)
1470 {
1471         return processor_set_tasks_internal(pset, task_list, count, TASK_FLAVOR_CONTROL);
1472 }
1473
1474 /*
1475  *      processor_set_tasks_with_flavor:
1476  *
1477  *      Based on flavor, return task/inspect/read port to all tasks in the processor set.
1478  */
1479 kern_return_t
1480 processor_set_tasks_with_flavor(
1481         processor_set_t         pset,
1482         mach_task_flavor_t      flavor,
1483         task_array_t            *task_list,
1484         mach_msg_type_number_t  *count)
1485 {
1486         switch (flavor) {
1487         case TASK_FLAVOR_CONTROL:
1488         case TASK_FLAVOR_READ:
1489         case TASK_FLAVOR_INSPECT:
1490         case TASK_FLAVOR_NAME:
1491                 return processor_set_tasks_internal(pset, task_list, count, flavor);
1492         default:
1493                 return KERN_INVALID_ARGUMENT;
1494         }
1495 }
1496
1497 /*
1498  *      processor_set_threads:
1499  *
1500  *      List all threads in the processor set.
1501  */
1502 #if defined(SECURE_KERNEL)
1503 kern_return_t
1504 processor_set_threads(
1505         __unused processor_set_t                pset,
1506         __unused thread_array_t         *thread_list,
1507         __unused mach_msg_type_number_t *count)
1508 {
1509         return KERN_FAILURE;
1510 }
1511 #elif !defined(XNU_TARGET_OS_OSX)
1512 kern_return_t
1513 processor_set_threads(
1514         __unused processor_set_t                pset,
1515         __unused thread_array_t         *thread_list,
1516         __unused mach_msg_type_number_t *count)
1517 {
1518         return KERN_NOT_SUPPORTED;
1519 }
1520 #else
1521 kern_return_t
1522 processor_set_threads(
1523         processor_set_t         pset,
1524         thread_array_t          *thread_list,
1525         mach_msg_type_number_t  *count)
1526 {
1527         kern_return_t ret;
1528         mach_msg_type_number_t i;
1529
1530         ret = processor_set_things(pset, (void **)thread_list, count, PSET_THING_THREAD);
1531         if (ret != KERN_SUCCESS) {
1532                 return ret;
1533         }
1534
1535         /* do the conversion that Mig should handle */
1536         for (i = 0; i < *count; i++) {
1537                 (*thread_list)[i] = (thread_t)convert_thread_to_port((*thread_list)[i]);
1538         }
1539         return KERN_SUCCESS;
1540 }
1541 #endif
1542
1543 /*
1544  *      processor_set_policy_control
1545  *
1546  *      Controls the scheduling attributes governing the processor set.
1547  *      Allows control of enabled policies, and per-policy base and limit
1548  *      priorities.
1549  */
1550 kern_return_t
1551 processor_set_policy_control(
1552         __unused processor_set_t                pset,
1553         __unused int                            flavor,
1554         __unused processor_set_info_t   policy_info,
1555         __unused mach_msg_type_number_t count,
1556         __unused boolean_t                      change)
1557 {
1558         return KERN_INVALID_ARGUMENT;
1559 }
1560
1561 #undef pset_deallocate
1562 void pset_deallocate(processor_set_t pset);
1563 void
1564 pset_deallocate(
1565         __unused processor_set_t        pset)
1566 {
1567         return;
1568 }
1569
1570 #undef pset_reference
1571 void pset_reference(processor_set_t pset);
1572 void
1573 pset_reference(
1574         __unused processor_set_t        pset)
1575 {
1576         return;
1577 }
1578
1579 #if CONFIG_THREAD_GROUPS
1580
1581 pset_cluster_type_t
1582 thread_group_pset_recommendation(__unused struct thread_group *tg, __unused cluster_type_t recommendation)
1583 {
1584 #if __AMP__
1585         switch (recommendation) {
1586         case CLUSTER_TYPE_SMP:
1587         default:
1588                 /*
1589                  * In case of SMP recommendations, check if the thread
1590                  * group has special flags which restrict it to the E
1591                  * cluster.
1592                  */
1593                 if (thread_group_smp_restricted(tg)) {
1594                         return PSET_AMP_E;
1595                 }
1596                 return PSET_AMP_P;
1597         case CLUSTER_TYPE_E:
1598                 return PSET_AMP_E;
1599         case CLUSTER_TYPE_P:
1600                 return PSET_AMP_P;
1601         }
1602 #else /* __AMP__ */
1603         return PSET_SMP;
1604 #endif /* __AMP__ */
1605 }
1606
1607 #endif
1608
1609 pset_cluster_type_t
1610 recommended_pset_type(thread_t thread)
1611 {
1612 #if CONFIG_THREAD_GROUPS && __AMP__
1613         if (thread == THREAD_NULL) {
1614                 return PSET_AMP_E;
1615         }
1616
1617         if (thread->sched_flags & TH_SFLAG_ECORE_ONLY) {
1618                 return PSET_AMP_E;
1619         } else if (thread->sched_flags & TH_SFLAG_PCORE_ONLY) {
1620                 return PSET_AMP_P;
1621         }
1622
1623         if (thread->base_pri <= MAXPRI_THROTTLE) {
1624                 if (os_atomic_load(&sched_perfctl_policy_bg, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1625                         return PSET_AMP_E;
1626                 }
1627         } else if (thread->base_pri <= BASEPRI_UTILITY) {
1628                 if (os_atomic_load(&sched_perfctl_policy_util, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1629                         return PSET_AMP_E;
1630                 }
1631         }
1632
1633 #if DEVELOPMENT || DEBUG
1634         extern bool system_ecore_only;
1635         extern processor_set_t pcore_set;
1636         if (system_ecore_only) {
1637                 if (thread->task->pset_hint == pcore_set) {
1638                         return PSET_AMP_P;
1639                 }
1640                 return PSET_AMP_E;
1641         }
1642 #endif
1643
1644         struct thread_group *tg = thread_group_get(thread);
1645         cluster_type_t recommendation = thread_group_recommendation(tg);
1646         switch (recommendation) {
1647         case CLUSTER_TYPE_SMP:
1648         default:
1649                 if (thread->task == kernel_task) {
1650                         return PSET_AMP_E;
1651                 }
1652                 return PSET_AMP_P;
1653         case CLUSTER_TYPE_E:
1654                 return PSET_AMP_E;
1655         case CLUSTER_TYPE_P:
1656                 return PSET_AMP_P;
1657         }
1658 #else
1659         (void)thread;
1660         return PSET_SMP;
1661 #endif
1662 }
1663
1664 #if CONFIG_THREAD_GROUPS && __AMP__
1665
1666 void
1667 sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class, boolean_t inherit)
1668 {
1669         sched_perfctl_class_policy_t sched_policy = inherit ? SCHED_PERFCTL_POLICY_FOLLOW_GROUP : SCHED_PERFCTL_POLICY_RESTRICT_E;
1670
1671         KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_PERFCTL_POLICY_CHANGE) | DBG_FUNC_NONE, perfctl_class, sched_policy, 0, 0);
1672
1673         switch (perfctl_class) {
1674         case PERFCONTROL_CLASS_UTILITY:
1675                 os_atomic_store(&sched_perfctl_policy_util, sched_policy, relaxed);
1676                 break;
1677         case PERFCONTROL_CLASS_BACKGROUND:
1678                 os_atomic_store(&sched_perfctl_policy_bg, sched_policy, relaxed);
1679                 break;
1680         default:
1681                 panic("perfctl_class invalid");
1682                 break;
1683         }
1684 }
1685
1686 #elif defined(__arm64__)
1687
1688 /* Define a stub routine since this symbol is exported on all arm64 platforms */
1689 void
1690 sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class, __unused boolean_t inherit)
1691 {
1692 }
1693
1694 #endif /* defined(__arm64__) */