osfmk/kern/processor.c

   1 /*
   2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58
  59 /*
  60  *      processor.c: processor and processor_set manipulation routines.
  61  */
  62
  63 #include <mach/boolean.h>
  64 #include <mach/policy.h>
  65 #include <mach/processor.h>
  66 #include <mach/processor_info.h>
  67 #include <mach/vm_param.h>
  68 #include <kern/cpu_number.h>
  69 #include <kern/host.h>
  70 #include <kern/ipc_host.h>
  71 #include <kern/ipc_tt.h>
  72 #include <kern/kalloc.h>
  73 #include <kern/machine.h>
  74 #include <kern/misc_protos.h>
  75 #include <kern/processor.h>
  76 #include <kern/sched.h>
  77 #include <kern/task.h>
  78 #include <kern/thread.h>
  79 #include <kern/timer.h>
  80 #if KPERF
  81 #include <kperf/kperf.h>
  82 #endif /* KPERF */
  83 #include <ipc/ipc_port.h>
  84
  85 #include <security/mac_mach_internal.h>
  86
  87 #if defined(CONFIG_XNUPOST)
  88
  89 #include <tests/xnupost.h>
  90
  91 #endif /* CONFIG_XNUPOST */
  92
  93 /*
  94  * Exported interface
  95  */
  96 #include <mach/mach_host_server.h>
  97 #include <mach/processor_set_server.h>
  98
  99 struct processor_set    pset0;
 100 struct pset_node        pset_node0;
 101
 102 static SIMPLE_LOCK_DECLARE(pset_node_lock, 0);
 103 LCK_GRP_DECLARE(pset_lck_grp, "pset");
 104
 105 queue_head_t            tasks;
 106 queue_head_t            terminated_tasks;       /* To be used ONLY for stackshot. */
 107 queue_head_t            corpse_tasks;
 108 int                     tasks_count;
 109 int                     terminated_tasks_count;
 110 queue_head_t            threads;
 111 queue_head_t            terminated_threads;
 112 int                     threads_count;
 113 int                     terminated_threads_count;
 114 LCK_GRP_DECLARE(task_lck_grp, "task");
 115 LCK_ATTR_DECLARE(task_lck_attr, 0, 0);
 116 LCK_MTX_DECLARE_ATTR(tasks_threads_lock, &task_lck_grp, &task_lck_attr);
 117 LCK_MTX_DECLARE_ATTR(tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
 118
 119 processor_t             processor_list;
 120 unsigned int            processor_count;
 121 static processor_t      processor_list_tail;
 122 SIMPLE_LOCK_DECLARE(processor_list_lock, 0);
 123
 124 uint32_t                processor_avail_count;
 125 uint32_t                processor_avail_count_user;
 126 uint32_t                primary_processor_avail_count;
 127 uint32_t                primary_processor_avail_count_user;
 128
 129 int                     master_cpu = 0;
 130
 131 struct processor        PERCPU_DATA(processor);
 132 processor_t             processor_array[MAX_SCHED_CPUS] = { 0 };
 133 processor_set_t         pset_array[MAX_PSETS] = { 0 };
 134
 135 static timer_call_func_t running_timer_funcs[] = {
 136         [RUNNING_TIMER_QUANTUM] = thread_quantum_expire,
 137         [RUNNING_TIMER_KPERF] = kperf_timer_expire,
 138 };
 139 static_assert(sizeof(running_timer_funcs) / sizeof(running_timer_funcs[0])
 140     == RUNNING_TIMER_MAX, "missing running timer function");
 141
 142 #if defined(CONFIG_XNUPOST)
 143 kern_return_t ipi_test(void);
 144 extern void arm64_ipi_test(void);
 145
 146 kern_return_t
 147 ipi_test()
 148 {
 149 #if __arm64__
 150         processor_t p;
 151
 152         for (p = processor_list; p != NULL; p = p->processor_list) {
 153                 thread_bind(p);
 154                 thread_block(THREAD_CONTINUE_NULL);
 155                 kprintf("Running IPI test on cpu %d\n", p->cpu_id);
 156                 arm64_ipi_test();
 157         }
 158
 159         /* unbind thread from specific cpu */
 160         thread_bind(PROCESSOR_NULL);
 161         thread_block(THREAD_CONTINUE_NULL);
 162
 163         T_PASS("Done running IPI tests");
 164 #else
 165         T_PASS("Unsupported platform. Not running IPI tests");
 166
 167 #endif /* __arm64__ */
 168
 169         return KERN_SUCCESS;
 170 }
 171 #endif /* defined(CONFIG_XNUPOST) */
 172
 173 int sched_enable_smt = 1;
 174
 175 void
 176 processor_bootstrap(void)
 177 {
 178         pset_node0.psets = &pset0;
 179         pset_init(&pset0, &pset_node0);
 180
 181         queue_init(&tasks);
 182         queue_init(&terminated_tasks);
 183         queue_init(&threads);
 184         queue_init(&terminated_threads);
 185         queue_init(&corpse_tasks);
 186
 187         processor_init(master_processor, master_cpu, &pset0);
 188 }
 189
 190 /*
 191  *      Initialize the given processor for the cpu
 192  *      indicated by cpu_id, and assign to the
 193  *      specified processor set.
 194  */
 195 void
 196 processor_init(
 197         processor_t            processor,
 198         int                    cpu_id,
 199         processor_set_t        pset)
 200 {
 201         spl_t           s;
 202
 203         assert(cpu_id < MAX_SCHED_CPUS);
 204         processor->cpu_id = cpu_id;
 205
 206         if (processor != master_processor) {
 207                 /* Scheduler state for master_processor initialized in sched_init() */
 208                 SCHED(processor_init)(processor);
 209         }
 210
 211         processor->state = PROCESSOR_OFF_LINE;
 212         processor->active_thread = processor->startup_thread = processor->idle_thread = THREAD_NULL;
 213         processor->processor_set = pset;
 214         processor_state_update_idle(processor);
 215         processor->starting_pri = MINPRI;
 216         processor->quantum_end = UINT64_MAX;
 217         processor->deadline = UINT64_MAX;
 218         processor->first_timeslice = FALSE;
 219         processor->processor_offlined = false;
 220         processor->processor_primary = processor; /* no SMT relationship known at this point */
 221         processor->processor_secondary = NULL;
 222         processor->is_SMT = false;
 223         processor->is_recommended = true;
 224         processor->processor_self = IP_NULL;
 225         processor->processor_list = NULL;
 226         processor->must_idle = false;
 227         processor->running_timers_active = false;
 228         for (int i = 0; i < RUNNING_TIMER_MAX; i++) {
 229                 timer_call_setup(&processor->running_timers[i],
 230                     running_timer_funcs[i], processor);
 231                 running_timer_clear(processor, i);
 232         }
 233
 234         timer_init(&processor->idle_state);
 235         timer_init(&processor->system_state);
 236         timer_init(&processor->user_state);
 237
 238         s = splsched();
 239         pset_lock(pset);
 240         bit_set(pset->cpu_bitmask, cpu_id);
 241         bit_set(pset->recommended_bitmask, cpu_id);
 242         bit_set(pset->primary_map, cpu_id);
 243         bit_set(pset->cpu_state_map[PROCESSOR_OFF_LINE], cpu_id);
 244         if (pset->cpu_set_count++ == 0) {
 245                 pset->cpu_set_low = pset->cpu_set_hi = cpu_id;
 246         } else {
 247                 pset->cpu_set_low = (cpu_id < pset->cpu_set_low)? cpu_id: pset->cpu_set_low;
 248                 pset->cpu_set_hi = (cpu_id > pset->cpu_set_hi)? cpu_id: pset->cpu_set_hi;
 249         }
 250         pset_unlock(pset);
 251         splx(s);
 252
 253         simple_lock(&processor_list_lock, LCK_GRP_NULL);
 254         if (processor_list == NULL) {
 255                 processor_list = processor;
 256         } else {
 257                 processor_list_tail->processor_list = processor;
 258         }
 259         processor_list_tail = processor;
 260         processor_count++;
 261         processor_array[cpu_id] = processor;
 262         simple_unlock(&processor_list_lock);
 263 }
 264
 265 bool system_is_SMT = false;
 266
 267 void
 268 processor_set_primary(
 269         processor_t             processor,
 270         processor_t             primary)
 271 {
 272         assert(processor->processor_primary == primary || processor->processor_primary == processor);
 273         /* Re-adjust primary point for this (possibly) secondary processor */
 274         processor->processor_primary = primary;
 275
 276         assert(primary->processor_secondary == NULL || primary->processor_secondary == processor);
 277         if (primary != processor) {
 278                 /* Link primary to secondary, assumes a 2-way SMT model
 279                  * We'll need to move to a queue if any future architecture
 280                  * requires otherwise.
 281                  */
 282                 assert(processor->processor_secondary == NULL);
 283                 primary->processor_secondary = processor;
 284                 /* Mark both processors as SMT siblings */
 285                 primary->is_SMT = TRUE;
 286                 processor->is_SMT = TRUE;
 287
 288                 if (!system_is_SMT) {
 289                         system_is_SMT = true;
 290                 }
 291
 292                 processor_set_t pset = processor->processor_set;
 293                 spl_t s = splsched();
 294                 pset_lock(pset);
 295                 if (!pset->is_SMT) {
 296                         pset->is_SMT = true;
 297                 }
 298                 bit_clear(pset->primary_map, processor->cpu_id);
 299                 pset_unlock(pset);
 300                 splx(s);
 301         }
 302 }
 303
 304 processor_set_t
 305 processor_pset(
 306         processor_t     processor)
 307 {
 308         return processor->processor_set;
 309 }
 310
 311 #if CONFIG_SCHED_EDGE
 312
 313 cluster_type_t
 314 pset_type_for_id(uint32_t cluster_id)
 315 {
 316         return pset_array[cluster_id]->pset_type;
 317 }
 318
 319 /*
 320  * Processor foreign threads
 321  *
 322  * With the Edge scheduler, each pset maintains a bitmap of processors running threads
 323  * which are foreign to the pset/cluster. A thread is defined as foreign for a cluster
 324  * if its of a different type than its preferred cluster type (E/P). The bitmap should
 325  * be updated every time a new thread is assigned to run on a processor.
 326  *
 327  * This bitmap allows the Edge scheduler to quickly find CPUs running foreign threads
 328  * for rebalancing.
 329  */
 330 static void
 331 processor_state_update_running_foreign(processor_t processor, thread_t thread)
 332 {
 333         cluster_type_t current_processor_type = pset_type_for_id(processor->processor_set->pset_cluster_id);
 334         cluster_type_t thread_type = pset_type_for_id(sched_edge_thread_preferred_cluster(thread));
 335
 336         /* Update the bitmap for the pset only for unbounded non-RT threads. */
 337         if ((processor->current_pri < BASEPRI_RTQUEUES) && (thread->bound_processor == PROCESSOR_NULL) && (current_processor_type != thread_type)) {
 338                 bit_set(processor->processor_set->cpu_running_foreign, processor->cpu_id);
 339         } else {
 340                 bit_clear(processor->processor_set->cpu_running_foreign, processor->cpu_id);
 341         }
 342 }
 343 #else /* CONFIG_SCHED_EDGE */
 344 static void
 345 processor_state_update_running_foreign(__unused processor_t processor, __unused thread_t thread)
 346 {
 347 }
 348 #endif /* CONFIG_SCHED_EDGE */
 349
 350 void
 351 processor_state_update_idle(processor_t processor)
 352 {
 353         processor->current_pri = IDLEPRI;
 354         processor->current_sfi_class = SFI_CLASS_KERNEL;
 355         processor->current_recommended_pset_type = PSET_SMP;
 356 #if CONFIG_THREAD_GROUPS
 357         processor->current_thread_group = NULL;
 358 #endif
 359         processor->current_perfctl_class = PERFCONTROL_CLASS_IDLE;
 360         processor->current_urgency = THREAD_URGENCY_NONE;
 361         processor->current_is_NO_SMT = false;
 362         processor->current_is_bound = false;
 363         processor->current_is_eagerpreempt = false;
 364         os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], TH_BUCKET_SCHED_MAX, relaxed);
 365 }
 366
 367 void
 368 processor_state_update_from_thread(processor_t processor, thread_t thread)
 369 {
 370         processor->current_pri = thread->sched_pri;
 371         processor->current_sfi_class = thread->sfi_class;
 372         processor->current_recommended_pset_type = recommended_pset_type(thread);
 373         processor_state_update_running_foreign(processor, thread);
 374         /* Since idle and bound threads are not tracked by the edge scheduler, ignore when those threads go on-core */
 375         sched_bucket_t bucket = ((thread->state & TH_IDLE) || (thread->bound_processor != PROCESSOR_NULL)) ? TH_BUCKET_SCHED_MAX : thread->th_sched_bucket;
 376         os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
 377
 378 #if CONFIG_THREAD_GROUPS
 379         processor->current_thread_group = thread_group_get(thread);
 380 #endif
 381         processor->current_perfctl_class = thread_get_perfcontrol_class(thread);
 382         processor->current_urgency = thread_get_urgency(thread, NULL, NULL);
 383         processor->current_is_NO_SMT = thread_no_smt(thread);
 384         processor->current_is_bound = thread->bound_processor != PROCESSOR_NULL;
 385         processor->current_is_eagerpreempt = thread_is_eager_preempt(thread);
 386 }
 387
 388 void
 389 processor_state_update_explicit(processor_t processor, int pri, sfi_class_id_t sfi_class,
 390     pset_cluster_type_t pset_type, perfcontrol_class_t perfctl_class, thread_urgency_t urgency, sched_bucket_t bucket)
 391 {
 392         processor->current_pri = pri;
 393         processor->current_sfi_class = sfi_class;
 394         processor->current_recommended_pset_type = pset_type;
 395         processor->current_perfctl_class = perfctl_class;
 396         processor->current_urgency = urgency;
 397         os_atomic_store(&processor->processor_set->cpu_running_buckets[processor->cpu_id], bucket, relaxed);
 398 }
 399
 400 pset_node_t
 401 pset_node_root(void)
 402 {
 403         return &pset_node0;
 404 }
 405
 406 processor_set_t
 407 pset_create(
 408         pset_node_t                     node)
 409 {
 410         /* some schedulers do not support multiple psets */
 411         if (SCHED(multiple_psets_enabled) == FALSE) {
 412                 return processor_pset(master_processor);
 413         }
 414
 415         processor_set_t *prev, pset = zalloc_permanent_type(struct processor_set);
 416
 417         if (pset != PROCESSOR_SET_NULL) {
 418                 pset_init(pset, node);
 419
 420                 simple_lock(&pset_node_lock, LCK_GRP_NULL);
 421
 422                 prev = &node->psets;
 423                 while (*prev != PROCESSOR_SET_NULL) {
 424                         prev = &(*prev)->pset_list;
 425                 }
 426
 427                 *prev = pset;
 428
 429                 simple_unlock(&pset_node_lock);
 430         }
 431
 432         return pset;
 433 }
 434
 435 /*
 436  *      Find processor set with specified cluster_id.
 437  *      Returns default_pset if not found.
 438  */
 439 processor_set_t
 440 pset_find(
 441         uint32_t cluster_id,
 442         processor_set_t default_pset)
 443 {
 444         simple_lock(&pset_node_lock, LCK_GRP_NULL);
 445         pset_node_t node = &pset_node0;
 446         processor_set_t pset = NULL;
 447
 448         do {
 449                 pset = node->psets;
 450                 while (pset != NULL) {
 451                         if (pset->pset_cluster_id == cluster_id) {
 452                                 break;
 453                         }
 454                         pset = pset->pset_list;
 455                 }
 456         } while (pset == NULL && (node = node->node_list) != NULL);
 457         simple_unlock(&pset_node_lock);
 458         if (pset == NULL) {
 459                 return default_pset;
 460         }
 461         return pset;
 462 }
 463
 464 #if !defined(RC_HIDE_XNU_FIRESTORM) && (MAX_CPU_CLUSTERS > 2)
 465
 466 /*
 467  * Find the first processor_set for the given pset_cluster_type.
 468  * Should be removed with rdar://57340304, as it's only
 469  * useful for the workaround described in rdar://57306691.
 470  */
 471
 472 processor_set_t
 473 pset_find_first_by_cluster_type(
 474         pset_cluster_type_t pset_cluster_type)
 475 {
 476         simple_lock(&pset_node_lock, LCK_GRP_NULL);
 477         pset_node_t node = &pset_node0;
 478         processor_set_t pset = NULL;
 479
 480         do {
 481                 pset = node->psets;
 482                 while (pset != NULL) {
 483                         if (pset->pset_cluster_type == pset_cluster_type) {
 484                                 break;
 485                         }
 486                         pset = pset->pset_list;
 487                 }
 488         } while (pset == NULL && (node = node->node_list) != NULL);
 489         simple_unlock(&pset_node_lock);
 490         return pset;
 491 }
 492
 493 #endif /* !defined(RC_HIDE_XNU_FIRESTORM) && (MAX_CPU_CLUSTERS > 2) */
 494
 495 /*
 496  *      Initialize the given processor_set structure.
 497  */
 498 void
 499 pset_init(
 500         processor_set_t         pset,
 501         pset_node_t                     node)
 502 {
 503         static uint32_t pset_count = 0;
 504
 505         if (pset != &pset0) {
 506                 /*
 507                  * Scheduler runqueue initialization for non-boot psets.
 508                  * This initialization for pset0 happens in sched_init().
 509                  */
 510                 SCHED(pset_init)(pset);
 511                 SCHED(rt_init)(pset);
 512         }
 513
 514         pset->online_processor_count = 0;
 515         pset->load_average = 0;
 516         bzero(&pset->pset_load_average, sizeof(pset->pset_load_average));
 517 #if CONFIG_SCHED_EDGE
 518         bzero(&pset->pset_execution_time, sizeof(pset->pset_execution_time));
 519 #endif /* CONFIG_SCHED_EDGE */
 520         pset->cpu_set_low = pset->cpu_set_hi = 0;
 521         pset->cpu_set_count = 0;
 522         pset->last_chosen = -1;
 523         pset->cpu_bitmask = 0;
 524         pset->recommended_bitmask = 0;
 525         pset->primary_map = 0;
 526         pset->realtime_map = 0;
 527         pset->cpu_running_foreign = 0;
 528
 529         for (uint i = 0; i < PROCESSOR_STATE_LEN; i++) {
 530                 pset->cpu_state_map[i] = 0;
 531         }
 532         pset->pending_AST_URGENT_cpu_mask = 0;
 533         pset->pending_AST_PREEMPT_cpu_mask = 0;
 534 #if defined(CONFIG_SCHED_DEFERRED_AST)
 535         pset->pending_deferred_AST_cpu_mask = 0;
 536 #endif
 537         pset->pending_spill_cpu_mask = 0;
 538         pset_lock_init(pset);
 539         pset->pset_self = IP_NULL;
 540         pset->pset_name_self = IP_NULL;
 541         pset->pset_list = PROCESSOR_SET_NULL;
 542         pset->node = node;
 543
 544         /*
 545          * The pset_cluster_type & pset_cluster_id for all psets
 546          * on the platform are initialized as part of the SCHED(init).
 547          * That works well for small cluster platforms; for large cluster
 548          * count systems, it might be cleaner to do all the setup
 549          * dynamically in SCHED(pset_init).
 550          *
 551          * <Edge Multi-cluster Support Needed>
 552          */
 553         pset->is_SMT = false;
 554
 555         simple_lock(&pset_node_lock, LCK_GRP_NULL);
 556         pset->pset_id = pset_count++;
 557         bit_set(node->pset_map, pset->pset_id);
 558         simple_unlock(&pset_node_lock);
 559
 560         pset_array[pset->pset_id] = pset;
 561 }
 562
 563 kern_return_t
 564 processor_info_count(
 565         processor_flavor_t              flavor,
 566         mach_msg_type_number_t  *count)
 567 {
 568         switch (flavor) {
 569         case PROCESSOR_BASIC_INFO:
 570                 *count = PROCESSOR_BASIC_INFO_COUNT;
 571                 break;
 572
 573         case PROCESSOR_CPU_LOAD_INFO:
 574                 *count = PROCESSOR_CPU_LOAD_INFO_COUNT;
 575                 break;
 576
 577         default:
 578                 return cpu_info_count(flavor, count);
 579         }
 580
 581         return KERN_SUCCESS;
 582 }
 583
 584
 585 kern_return_t
 586 processor_info(
 587         processor_t     processor,
 588         processor_flavor_t              flavor,
 589         host_t                                  *host,
 590         processor_info_t                info,
 591         mach_msg_type_number_t  *count)
 592 {
 593         int     cpu_id, state;
 594         kern_return_t   result;
 595
 596         if (processor == PROCESSOR_NULL) {
 597                 return KERN_INVALID_ARGUMENT;
 598         }
 599
 600         cpu_id = processor->cpu_id;
 601
 602         switch (flavor) {
 603         case PROCESSOR_BASIC_INFO:
 604         {
 605                 processor_basic_info_t          basic_info;
 606
 607                 if (*count < PROCESSOR_BASIC_INFO_COUNT) {
 608                         return KERN_FAILURE;
 609                 }
 610
 611                 basic_info = (processor_basic_info_t) info;
 612                 basic_info->cpu_type = slot_type(cpu_id);
 613                 basic_info->cpu_subtype = slot_subtype(cpu_id);
 614                 state = processor->state;
 615                 if (state == PROCESSOR_OFF_LINE
 616 #if defined(__x86_64__)
 617                     || !processor->is_recommended
 618 #endif
 619                     ) {
 620                         basic_info->running = FALSE;
 621                 } else {
 622                         basic_info->running = TRUE;
 623                 }
 624                 basic_info->slot_num = cpu_id;
 625                 if (processor == master_processor) {
 626                         basic_info->is_master = TRUE;
 627                 } else {
 628                         basic_info->is_master = FALSE;
 629                 }
 630
 631                 *count = PROCESSOR_BASIC_INFO_COUNT;
 632                 *host = &realhost;
 633
 634                 return KERN_SUCCESS;
 635         }
 636
 637         case PROCESSOR_CPU_LOAD_INFO:
 638         {
 639                 processor_cpu_load_info_t       cpu_load_info;
 640                 timer_t         idle_state;
 641                 uint64_t        idle_time_snapshot1, idle_time_snapshot2;
 642                 uint64_t        idle_time_tstamp1, idle_time_tstamp2;
 643
 644                 /*
 645                  * We capture the accumulated idle time twice over
 646                  * the course of this function, as well as the timestamps
 647                  * when each were last updated. Since these are
 648                  * all done using non-atomic racy mechanisms, the
 649                  * most we can infer is whether values are stable.
 650                  * timer_grab() is the only function that can be
 651                  * used reliably on another processor's per-processor
 652                  * data.
 653                  */
 654
 655                 if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT) {
 656                         return KERN_FAILURE;
 657                 }
 658
 659                 cpu_load_info = (processor_cpu_load_info_t) info;
 660                 if (precise_user_kernel_time) {
 661                         cpu_load_info->cpu_ticks[CPU_STATE_USER] =
 662                             (uint32_t)(timer_grab(&processor->user_state) / hz_tick_interval);
 663                         cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] =
 664                             (uint32_t)(timer_grab(&processor->system_state) / hz_tick_interval);
 665                 } else {
 666                         uint64_t tval = timer_grab(&processor->user_state) +
 667                             timer_grab(&processor->system_state);
 668
 669                         cpu_load_info->cpu_ticks[CPU_STATE_USER] = (uint32_t)(tval / hz_tick_interval);
 670                         cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] = 0;
 671                 }
 672
 673                 idle_state = &processor->idle_state;
 674                 idle_time_snapshot1 = timer_grab(idle_state);
 675                 idle_time_tstamp1 = idle_state->tstamp;
 676
 677                 /*
 678                  * Idle processors are not continually updating their
 679                  * per-processor idle timer, so it may be extremely
 680                  * out of date, resulting in an over-representation
 681                  * of non-idle time between two measurement
 682                  * intervals by e.g. top(1). If we are non-idle, or
 683                  * have evidence that the timer is being updated
 684                  * concurrently, we consider its value up-to-date.
 685                  */
 686                 if (processor->current_state != idle_state) {
 687                         cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
 688                             (uint32_t)(idle_time_snapshot1 / hz_tick_interval);
 689                 } else if ((idle_time_snapshot1 != (idle_time_snapshot2 = timer_grab(idle_state))) ||
 690                     (idle_time_tstamp1 != (idle_time_tstamp2 = idle_state->tstamp))) {
 691                         /* Idle timer is being updated concurrently, second stamp is good enough */
 692                         cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
 693                             (uint32_t)(idle_time_snapshot2 / hz_tick_interval);
 694                 } else {
 695                         /*
 696                          * Idle timer may be very stale. Fortunately we have established
 697                          * that idle_time_snapshot1 and idle_time_tstamp1 are unchanging
 698                          */
 699                         idle_time_snapshot1 += mach_absolute_time() - idle_time_tstamp1;
 700
 701                         cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
 702                             (uint32_t)(idle_time_snapshot1 / hz_tick_interval);
 703                 }
 704
 705                 cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0;
 706
 707                 *count = PROCESSOR_CPU_LOAD_INFO_COUNT;
 708                 *host = &realhost;
 709
 710                 return KERN_SUCCESS;
 711         }
 712
 713         default:
 714                 result = cpu_info(flavor, cpu_id, info, count);
 715                 if (result == KERN_SUCCESS) {
 716                         *host = &realhost;
 717                 }
 718
 719                 return result;
 720         }
 721 }
 722
 723 kern_return_t
 724 processor_start(
 725         processor_t                     processor)
 726 {
 727         processor_set_t         pset;
 728         thread_t                        thread;
 729         kern_return_t           result;
 730         spl_t                           s;
 731
 732         if (processor == PROCESSOR_NULL || processor->processor_set == PROCESSOR_SET_NULL) {
 733                 return KERN_INVALID_ARGUMENT;
 734         }
 735
 736         if (processor == master_processor) {
 737                 processor_t             prev;
 738
 739                 prev = thread_bind(processor);
 740                 thread_block(THREAD_CONTINUE_NULL);
 741
 742                 result = cpu_start(processor->cpu_id);
 743
 744                 thread_bind(prev);
 745
 746                 return result;
 747         }
 748
 749         bool scheduler_disable = false;
 750
 751         if ((processor->processor_primary != processor) && (sched_enable_smt == 0)) {
 752                 if (cpu_can_exit(processor->cpu_id)) {
 753                         return KERN_SUCCESS;
 754                 }
 755                 /*
 756                  * This secondary SMT processor must start in order to service interrupts,
 757                  * so instead it will be disabled at the scheduler level.
 758                  */
 759                 scheduler_disable = true;
 760         }
 761
 762         ml_cpu_begin_state_transition(processor->cpu_id);
 763         s = splsched();
 764         pset = processor->processor_set;
 765         pset_lock(pset);
 766         if (processor->state != PROCESSOR_OFF_LINE) {
 767                 pset_unlock(pset);
 768                 splx(s);
 769                 ml_cpu_end_state_transition(processor->cpu_id);
 770
 771                 return KERN_FAILURE;
 772         }
 773
 774         pset_update_processor_state(pset, processor, PROCESSOR_START);
 775         pset_unlock(pset);
 776         splx(s);
 777
 778         /*
 779          *      Create the idle processor thread.
 780          */
 781         if (processor->idle_thread == THREAD_NULL) {
 782                 result = idle_thread_create(processor);
 783                 if (result != KERN_SUCCESS) {
 784                         s = splsched();
 785                         pset_lock(pset);
 786                         pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
 787                         pset_unlock(pset);
 788                         splx(s);
 789                         ml_cpu_end_state_transition(processor->cpu_id);
 790
 791                         return result;
 792                 }
 793         }
 794
 795         /*
 796          *      If there is no active thread, the processor
 797          *      has never been started.  Create a dedicated
 798          *      start up thread.
 799          */
 800         if (processor->active_thread == THREAD_NULL &&
 801             processor->startup_thread == THREAD_NULL) {
 802                 result = kernel_thread_create(processor_start_thread, NULL, MAXPRI_KERNEL, &thread);
 803                 if (result != KERN_SUCCESS) {
 804                         s = splsched();
 805                         pset_lock(pset);
 806                         pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
 807                         pset_unlock(pset);
 808                         splx(s);
 809                         ml_cpu_end_state_transition(processor->cpu_id);
 810
 811                         return result;
 812                 }
 813
 814                 s = splsched();
 815                 thread_lock(thread);
 816                 thread->bound_processor = processor;
 817                 processor->startup_thread = thread;
 818                 thread->state = TH_RUN;
 819                 thread->last_made_runnable_time = mach_absolute_time();
 820                 thread_unlock(thread);
 821                 splx(s);
 822
 823                 thread_deallocate(thread);
 824         }
 825
 826         if (processor->processor_self == IP_NULL) {
 827                 ipc_processor_init(processor);
 828         }
 829
 830         ml_broadcast_cpu_event(CPU_BOOT_REQUESTED, processor->cpu_id);
 831         result = cpu_start(processor->cpu_id);
 832         if (result != KERN_SUCCESS) {
 833                 s = splsched();
 834                 pset_lock(pset);
 835                 pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
 836                 pset_unlock(pset);
 837                 splx(s);
 838                 ml_cpu_end_state_transition(processor->cpu_id);
 839
 840                 return result;
 841         }
 842         if (scheduler_disable) {
 843                 assert(processor->processor_primary != processor);
 844                 sched_processor_enable(processor, FALSE);
 845         }
 846
 847         ipc_processor_enable(processor);
 848         ml_cpu_end_state_transition(processor->cpu_id);
 849         ml_broadcast_cpu_event(CPU_ACTIVE, processor->cpu_id);
 850
 851         return KERN_SUCCESS;
 852 }
 853
 854
 855 kern_return_t
 856 processor_exit(
 857         processor_t     processor)
 858 {
 859         if (processor == PROCESSOR_NULL) {
 860                 return KERN_INVALID_ARGUMENT;
 861         }
 862
 863         return processor_shutdown(processor);
 864 }
 865
 866
 867 kern_return_t
 868 processor_start_from_user(
 869         processor_t                     processor)
 870 {
 871         kern_return_t ret;
 872
 873         if (processor == PROCESSOR_NULL) {
 874                 return KERN_INVALID_ARGUMENT;
 875         }
 876
 877         if (!cpu_can_exit(processor->cpu_id)) {
 878                 ret = sched_processor_enable(processor, TRUE);
 879         } else {
 880                 ret = processor_start(processor);
 881         }
 882
 883         return ret;
 884 }
 885
 886 kern_return_t
 887 processor_exit_from_user(
 888         processor_t     processor)
 889 {
 890         kern_return_t ret;
 891
 892         if (processor == PROCESSOR_NULL) {
 893                 return KERN_INVALID_ARGUMENT;
 894         }
 895
 896         if (!cpu_can_exit(processor->cpu_id)) {
 897                 ret = sched_processor_enable(processor, FALSE);
 898         } else {
 899                 ret = processor_shutdown(processor);
 900         }
 901
 902         return ret;
 903 }
 904
 905 kern_return_t
 906 enable_smt_processors(bool enable)
 907 {
 908         if (machine_info.logical_cpu_max == machine_info.physical_cpu_max) {
 909                 /* Not an SMT system */
 910                 return KERN_INVALID_ARGUMENT;
 911         }
 912
 913         int ncpus = machine_info.logical_cpu_max;
 914
 915         for (int i = 1; i < ncpus; i++) {
 916                 processor_t processor = processor_array[i];
 917
 918                 if (processor->processor_primary != processor) {
 919                         if (enable) {
 920                                 processor_start_from_user(processor);
 921                         } else { /* Disable */
 922                                 processor_exit_from_user(processor);
 923                         }
 924                 }
 925         }
 926
 927 #define BSD_HOST 1
 928         host_basic_info_data_t hinfo;
 929         mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
 930         kern_return_t kret = host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
 931         if (kret != KERN_SUCCESS) {
 932                 return kret;
 933         }
 934
 935         if (enable && (hinfo.logical_cpu != hinfo.logical_cpu_max)) {
 936                 return KERN_FAILURE;
 937         }
 938
 939         if (!enable && (hinfo.logical_cpu != hinfo.physical_cpu)) {
 940                 return KERN_FAILURE;
 941         }
 942
 943         return KERN_SUCCESS;
 944 }
 945
 946 kern_return_t
 947 processor_control(
 948         processor_t             processor,
 949         processor_info_t        info,
 950         mach_msg_type_number_t  count)
 951 {
 952         if (processor == PROCESSOR_NULL) {
 953                 return KERN_INVALID_ARGUMENT;
 954         }
 955
 956         return cpu_control(processor->cpu_id, info, count);
 957 }
 958
 959 kern_return_t
 960 processor_set_create(
 961         __unused host_t         host,
 962         __unused processor_set_t        *new_set,
 963         __unused processor_set_t        *new_name)
 964 {
 965         return KERN_FAILURE;
 966 }
 967
 968 kern_return_t
 969 processor_set_destroy(
 970         __unused processor_set_t        pset)
 971 {
 972         return KERN_FAILURE;
 973 }
 974
 975 kern_return_t
 976 processor_get_assignment(
 977         processor_t     processor,
 978         processor_set_t *pset)
 979 {
 980         int state;
 981
 982         if (processor == PROCESSOR_NULL) {
 983                 return KERN_INVALID_ARGUMENT;
 984         }
 985
 986         state = processor->state;
 987         if (state == PROCESSOR_SHUTDOWN || state == PROCESSOR_OFF_LINE) {
 988                 return KERN_FAILURE;
 989         }
 990
 991         *pset = &pset0;
 992
 993         return KERN_SUCCESS;
 994 }
 995
 996 kern_return_t
 997 processor_set_info(
 998         processor_set_t         pset,
 999         int                     flavor,
1000         host_t                  *host,
1001         processor_set_info_t    info,
1002         mach_msg_type_number_t  *count)
1003 {
1004         if (pset == PROCESSOR_SET_NULL) {
1005                 return KERN_INVALID_ARGUMENT;
1006         }
1007
1008         if (flavor == PROCESSOR_SET_BASIC_INFO) {
1009                 processor_set_basic_info_t      basic_info;
1010
1011                 if (*count < PROCESSOR_SET_BASIC_INFO_COUNT) {
1012                         return KERN_FAILURE;
1013                 }
1014
1015                 basic_info = (processor_set_basic_info_t) info;
1016 #if defined(__x86_64__)
1017                 basic_info->processor_count = processor_avail_count_user;
1018 #else
1019                 basic_info->processor_count = processor_avail_count;
1020 #endif
1021                 basic_info->default_policy = POLICY_TIMESHARE;
1022
1023                 *count = PROCESSOR_SET_BASIC_INFO_COUNT;
1024                 *host = &realhost;
1025                 return KERN_SUCCESS;
1026         } else if (flavor == PROCESSOR_SET_TIMESHARE_DEFAULT) {
1027                 policy_timeshare_base_t ts_base;
1028
1029                 if (*count < POLICY_TIMESHARE_BASE_COUNT) {
1030                         return KERN_FAILURE;
1031                 }
1032
1033                 ts_base = (policy_timeshare_base_t) info;
1034                 ts_base->base_priority = BASEPRI_DEFAULT;
1035
1036                 *count = POLICY_TIMESHARE_BASE_COUNT;
1037                 *host = &realhost;
1038                 return KERN_SUCCESS;
1039         } else if (flavor == PROCESSOR_SET_FIFO_DEFAULT) {
1040                 policy_fifo_base_t              fifo_base;
1041
1042                 if (*count < POLICY_FIFO_BASE_COUNT) {
1043                         return KERN_FAILURE;
1044                 }
1045
1046                 fifo_base = (policy_fifo_base_t) info;
1047                 fifo_base->base_priority = BASEPRI_DEFAULT;
1048
1049                 *count = POLICY_FIFO_BASE_COUNT;
1050                 *host = &realhost;
1051                 return KERN_SUCCESS;
1052         } else if (flavor == PROCESSOR_SET_RR_DEFAULT) {
1053                 policy_rr_base_t                rr_base;
1054
1055                 if (*count < POLICY_RR_BASE_COUNT) {
1056                         return KERN_FAILURE;
1057                 }
1058
1059                 rr_base = (policy_rr_base_t) info;
1060                 rr_base->base_priority = BASEPRI_DEFAULT;
1061                 rr_base->quantum = 1;
1062
1063                 *count = POLICY_RR_BASE_COUNT;
1064                 *host = &realhost;
1065                 return KERN_SUCCESS;
1066         } else if (flavor == PROCESSOR_SET_TIMESHARE_LIMITS) {
1067                 policy_timeshare_limit_t        ts_limit;
1068
1069                 if (*count < POLICY_TIMESHARE_LIMIT_COUNT) {
1070                         return KERN_FAILURE;
1071                 }
1072
1073                 ts_limit = (policy_timeshare_limit_t) info;
1074                 ts_limit->max_priority = MAXPRI_KERNEL;
1075
1076                 *count = POLICY_TIMESHARE_LIMIT_COUNT;
1077                 *host = &realhost;
1078                 return KERN_SUCCESS;
1079         } else if (flavor == PROCESSOR_SET_FIFO_LIMITS) {
1080                 policy_fifo_limit_t             fifo_limit;
1081
1082                 if (*count < POLICY_FIFO_LIMIT_COUNT) {
1083                         return KERN_FAILURE;
1084                 }
1085
1086                 fifo_limit = (policy_fifo_limit_t) info;
1087                 fifo_limit->max_priority = MAXPRI_KERNEL;
1088
1089                 *count = POLICY_FIFO_LIMIT_COUNT;
1090                 *host = &realhost;
1091                 return KERN_SUCCESS;
1092         } else if (flavor == PROCESSOR_SET_RR_LIMITS) {
1093                 policy_rr_limit_t               rr_limit;
1094
1095                 if (*count < POLICY_RR_LIMIT_COUNT) {
1096                         return KERN_FAILURE;
1097                 }
1098
1099                 rr_limit = (policy_rr_limit_t) info;
1100                 rr_limit->max_priority = MAXPRI_KERNEL;
1101
1102                 *count = POLICY_RR_LIMIT_COUNT;
1103                 *host = &realhost;
1104                 return KERN_SUCCESS;
1105         } else if (flavor == PROCESSOR_SET_ENABLED_POLICIES) {
1106                 int                             *enabled;
1107
1108                 if (*count < (sizeof(*enabled) / sizeof(int))) {
1109                         return KERN_FAILURE;
1110                 }
1111
1112                 enabled = (int *) info;
1113                 *enabled = POLICY_TIMESHARE | POLICY_RR | POLICY_FIFO;
1114
1115                 *count = sizeof(*enabled) / sizeof(int);
1116                 *host = &realhost;
1117                 return KERN_SUCCESS;
1118         }
1119
1120
1121         *host = HOST_NULL;
1122         return KERN_INVALID_ARGUMENT;
1123 }
1124
1125 /*
1126  *      processor_set_statistics
1127  *
1128  *      Returns scheduling statistics for a processor set.
1129  */
1130 kern_return_t
1131 processor_set_statistics(
1132         processor_set_t         pset,
1133         int                     flavor,
1134         processor_set_info_t    info,
1135         mach_msg_type_number_t  *count)
1136 {
1137         if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1138                 return KERN_INVALID_PROCESSOR_SET;
1139         }
1140
1141         if (flavor == PROCESSOR_SET_LOAD_INFO) {
1142                 processor_set_load_info_t     load_info;
1143
1144                 if (*count < PROCESSOR_SET_LOAD_INFO_COUNT) {
1145                         return KERN_FAILURE;
1146                 }
1147
1148                 load_info = (processor_set_load_info_t) info;
1149
1150                 load_info->mach_factor = sched_mach_factor;
1151                 load_info->load_average = sched_load_average;
1152
1153                 load_info->task_count = tasks_count;
1154                 load_info->thread_count = threads_count;
1155
1156                 *count = PROCESSOR_SET_LOAD_INFO_COUNT;
1157                 return KERN_SUCCESS;
1158         }
1159
1160         return KERN_INVALID_ARGUMENT;
1161 }
1162
1163 /*
1164  *      processor_set_max_priority:
1165  *
1166  *      Specify max priority permitted on processor set.  This affects
1167  *      newly created and assigned threads.  Optionally change existing
1168  *      ones.
1169  */
1170 kern_return_t
1171 processor_set_max_priority(
1172         __unused processor_set_t        pset,
1173         __unused int                    max_priority,
1174         __unused boolean_t              change_threads)
1175 {
1176         return KERN_INVALID_ARGUMENT;
1177 }
1178
1179 /*
1180  *      processor_set_policy_enable:
1181  *
1182  *      Allow indicated policy on processor set.
1183  */
1184
1185 kern_return_t
1186 processor_set_policy_enable(
1187         __unused processor_set_t        pset,
1188         __unused int                    policy)
1189 {
1190         return KERN_INVALID_ARGUMENT;
1191 }
1192
1193 /*
1194  *      processor_set_policy_disable:
1195  *
1196  *      Forbid indicated policy on processor set.  Time sharing cannot
1197  *      be forbidden.
1198  */
1199 kern_return_t
1200 processor_set_policy_disable(
1201         __unused processor_set_t        pset,
1202         __unused int                    policy,
1203         __unused boolean_t              change_threads)
1204 {
1205         return KERN_INVALID_ARGUMENT;
1206 }
1207
1208 /*
1209  *      processor_set_things:
1210  *
1211  *      Common internals for processor_set_{threads,tasks}
1212  */
1213 static kern_return_t
1214 processor_set_things(
1215         processor_set_t pset,
1216         void **thing_list,
1217         mach_msg_type_number_t *count,
1218         int type,
1219         mach_task_flavor_t flavor)
1220 {
1221         unsigned int i;
1222         task_t task;
1223         thread_t thread;
1224
1225         task_t *task_list;
1226         unsigned int actual_tasks;
1227         vm_size_t task_size, task_size_needed;
1228
1229         thread_t *thread_list;
1230         unsigned int actual_threads;
1231         vm_size_t thread_size, thread_size_needed;
1232
1233         void *addr, *newaddr;
1234         vm_size_t size, size_needed;
1235
1236         if (pset == PROCESSOR_SET_NULL || pset != &pset0) {
1237                 return KERN_INVALID_ARGUMENT;
1238         }
1239
1240         task_size = 0;
1241         task_size_needed = 0;
1242         task_list = NULL;
1243         actual_tasks = 0;
1244
1245         thread_size = 0;
1246         thread_size_needed = 0;
1247         thread_list = NULL;
1248         actual_threads = 0;
1249
1250         for (;;) {
1251                 lck_mtx_lock(&tasks_threads_lock);
1252
1253                 /* do we have the memory we need? */
1254                 if (type == PSET_THING_THREAD) {
1255                         thread_size_needed = threads_count * sizeof(void *);
1256                 }
1257 #if !CONFIG_MACF
1258                 else
1259 #endif
1260                 task_size_needed = tasks_count * sizeof(void *);
1261
1262                 if (task_size_needed <= task_size &&
1263                     thread_size_needed <= thread_size) {
1264                         break;
1265                 }
1266
1267                 /* unlock and allocate more memory */
1268                 lck_mtx_unlock(&tasks_threads_lock);
1269
1270                 /* grow task array */
1271                 if (task_size_needed > task_size) {
1272                         if (task_size != 0) {
1273                                 kfree(task_list, task_size);
1274                         }
1275
1276                         assert(task_size_needed > 0);
1277                         task_size = task_size_needed;
1278
1279                         task_list = (task_t *)kalloc(task_size);
1280                         if (task_list == NULL) {
1281                                 if (thread_size != 0) {
1282                                         kfree(thread_list, thread_size);
1283                                 }
1284                                 return KERN_RESOURCE_SHORTAGE;
1285                         }
1286                 }
1287
1288                 /* grow thread array */
1289                 if (thread_size_needed > thread_size) {
1290                         if (thread_size != 0) {
1291                                 kfree(thread_list, thread_size);
1292                         }
1293
1294                         assert(thread_size_needed > 0);
1295                         thread_size = thread_size_needed;
1296
1297                         thread_list = (thread_t *)kalloc(thread_size);
1298                         if (thread_list == 0) {
1299                                 if (task_size != 0) {
1300                                         kfree(task_list, task_size);
1301                                 }
1302                                 return KERN_RESOURCE_SHORTAGE;
1303                         }
1304                 }
1305         }
1306
1307         /* OK, have memory and the list locked */
1308
1309         /* If we need it, get the thread list */
1310         if (type == PSET_THING_THREAD) {
1311                 for (thread = (thread_t)queue_first(&threads);
1312                     !queue_end(&threads, (queue_entry_t)thread);
1313                     thread = (thread_t)queue_next(&thread->threads)) {
1314 #if defined(SECURE_KERNEL)
1315                         if (thread->task != kernel_task) {
1316 #endif
1317                         thread_reference_internal(thread);
1318                         thread_list[actual_threads++] = thread;
1319 #if defined(SECURE_KERNEL)
1320                 }
1321 #endif
1322                 }
1323         }
1324 #if !CONFIG_MACF
1325         else {
1326 #endif
1327         /* get a list of the tasks */
1328         for (task = (task_t)queue_first(&tasks);
1329             !queue_end(&tasks, (queue_entry_t)task);
1330             task = (task_t)queue_next(&task->tasks)) {
1331 #if defined(SECURE_KERNEL)
1332                 if (task != kernel_task) {
1333 #endif
1334                 task_reference_internal(task);
1335                 task_list[actual_tasks++] = task;
1336 #if defined(SECURE_KERNEL)
1337         }
1338 #endif
1339         }
1340 #if !CONFIG_MACF
1341 }
1342 #endif
1343
1344         lck_mtx_unlock(&tasks_threads_lock);
1345
1346 #if CONFIG_MACF
1347         unsigned int j, used;
1348
1349         /* for each task, make sure we are allowed to examine it */
1350         for (i = used = 0; i < actual_tasks; i++) {
1351                 if (mac_task_check_expose_task(task_list[i], flavor)) {
1352                         task_deallocate(task_list[i]);
1353                         continue;
1354                 }
1355                 task_list[used++] = task_list[i];
1356         }
1357         actual_tasks = used;
1358         task_size_needed = actual_tasks * sizeof(void *);
1359
1360         if (type == PSET_THING_THREAD) {
1361                 /* for each thread (if any), make sure it's task is in the allowed list */
1362                 for (i = used = 0; i < actual_threads; i++) {
1363                         boolean_t found_task = FALSE;
1364
1365                         task = thread_list[i]->task;
1366                         for (j = 0; j < actual_tasks; j++) {
1367                                 if (task_list[j] == task) {
1368                                         found_task = TRUE;
1369                                         break;
1370                                 }
1371                         }
1372                         if (found_task) {
1373                                 thread_list[used++] = thread_list[i];
1374                         } else {
1375                                 thread_deallocate(thread_list[i]);
1376                         }
1377                 }
1378                 actual_threads = used;
1379                 thread_size_needed = actual_threads * sizeof(void *);
1380
1381                 /* done with the task list */
1382                 for (i = 0; i < actual_tasks; i++) {
1383                         task_deallocate(task_list[i]);
1384                 }
1385                 kfree(task_list, task_size);
1386                 task_size = 0;
1387                 actual_tasks = 0;
1388                 task_list = NULL;
1389         }
1390 #endif
1391
1392         if (type == PSET_THING_THREAD) {
1393                 if (actual_threads == 0) {
1394                         /* no threads available to return */
1395                         assert(task_size == 0);
1396                         if (thread_size != 0) {
1397                                 kfree(thread_list, thread_size);
1398                         }
1399                         *thing_list = NULL;
1400                         *count = 0;
1401                         return KERN_SUCCESS;
1402                 }
1403                 size_needed = actual_threads * sizeof(void *);
1404                 size = thread_size;
1405                 addr = thread_list;
1406         } else {
1407                 if (actual_tasks == 0) {
1408                         /* no tasks available to return */
1409                         assert(thread_size == 0);
1410                         if (task_size != 0) {
1411                                 kfree(task_list, task_size);
1412                         }
1413                         *thing_list = NULL;
1414                         *count = 0;
1415                         return KERN_SUCCESS;
1416                 }
1417                 size_needed = actual_tasks * sizeof(void *);
1418                 size = task_size;
1419                 addr = task_list;
1420         }
1421
1422         /* if we allocated too much, must copy */
1423         if (size_needed < size) {
1424                 newaddr = kalloc(size_needed);
1425                 if (newaddr == 0) {
1426                         for (i = 0; i < actual_tasks; i++) {
1427                                 if (type == PSET_THING_THREAD) {
1428                                         thread_deallocate(thread_list[i]);
1429                                 } else {
1430                                         task_deallocate(task_list[i]);
1431                                 }
1432                         }
1433                         if (size) {
1434                                 kfree(addr, size);
1435                         }
1436                         return KERN_RESOURCE_SHORTAGE;
1437                 }
1438
1439                 bcopy((void *) addr, (void *) newaddr, size_needed);
1440                 kfree(addr, size);
1441
1442                 addr = newaddr;
1443                 size = size_needed;
1444         }
1445
1446         *thing_list = (void **)addr;
1447         *count = (unsigned int)size / sizeof(void *);
1448
1449         return KERN_SUCCESS;
1450 }
1451
1452 /*
1453  *      processor_set_tasks:
1454  *
1455  *      List all tasks in the processor set.
1456  */
1457 static kern_return_t
1458 processor_set_tasks_internal(
1459         processor_set_t         pset,
1460         task_array_t            *task_list,
1461         mach_msg_type_number_t  *count,
1462         mach_task_flavor_t      flavor)
1463 {
1464         kern_return_t ret;
1465         mach_msg_type_number_t i;
1466
1467         ret = processor_set_things(pset, (void **)task_list, count, PSET_THING_TASK, flavor);
1468         if (ret != KERN_SUCCESS) {
1469                 return ret;
1470         }
1471
1472         /* do the conversion that Mig should handle */
1473         switch (flavor) {
1474         case TASK_FLAVOR_CONTROL:
1475                 for (i = 0; i < *count; i++) {
1476                         if ((*task_list)[i] == current_task()) {
1477                                 /* if current_task(), return pinned port */
1478                                 (*task_list)[i] = (task_t)convert_task_to_port_pinned((*task_list)[i]);
1479                         } else {
1480                                 (*task_list)[i] = (task_t)convert_task_to_port((*task_list)[i]);
1481                         }
1482                 }
1483                 break;
1484         case TASK_FLAVOR_READ:
1485                 for (i = 0; i < *count; i++) {
1486                         (*task_list)[i] = (task_t)convert_task_read_to_port((*task_list)[i]);
1487                 }
1488                 break;
1489         case TASK_FLAVOR_INSPECT:
1490                 for (i = 0; i < *count; i++) {
1491                         (*task_list)[i] = (task_t)convert_task_inspect_to_port((*task_list)[i]);
1492                 }
1493                 break;
1494         case TASK_FLAVOR_NAME:
1495                 for (i = 0; i < *count; i++) {
1496                         (*task_list)[i] = (task_t)convert_task_name_to_port((*task_list)[i]);
1497                 }
1498                 break;
1499         default:
1500                 return KERN_INVALID_ARGUMENT;
1501         }
1502
1503         return KERN_SUCCESS;
1504 }
1505
1506 kern_return_t
1507 processor_set_tasks(
1508         processor_set_t         pset,
1509         task_array_t            *task_list,
1510         mach_msg_type_number_t  *count)
1511 {
1512         return processor_set_tasks_internal(pset, task_list, count, TASK_FLAVOR_CONTROL);
1513 }
1514
1515 /*
1516  *      processor_set_tasks_with_flavor:
1517  *
1518  *      Based on flavor, return task/inspect/read port to all tasks in the processor set.
1519  */
1520 kern_return_t
1521 processor_set_tasks_with_flavor(
1522         processor_set_t         pset,
1523         mach_task_flavor_t      flavor,
1524         task_array_t            *task_list,
1525         mach_msg_type_number_t  *count)
1526 {
1527         switch (flavor) {
1528         case TASK_FLAVOR_CONTROL:
1529         case TASK_FLAVOR_READ:
1530         case TASK_FLAVOR_INSPECT:
1531         case TASK_FLAVOR_NAME:
1532                 return processor_set_tasks_internal(pset, task_list, count, flavor);
1533         default:
1534                 return KERN_INVALID_ARGUMENT;
1535         }
1536 }
1537
1538 /*
1539  *      processor_set_threads:
1540  *
1541  *      List all threads in the processor set.
1542  */
1543 #if defined(SECURE_KERNEL)
1544 kern_return_t
1545 processor_set_threads(
1546         __unused processor_set_t                pset,
1547         __unused thread_array_t         *thread_list,
1548         __unused mach_msg_type_number_t *count)
1549 {
1550         return KERN_FAILURE;
1551 }
1552 #elif !defined(XNU_TARGET_OS_OSX)
1553 kern_return_t
1554 processor_set_threads(
1555         __unused processor_set_t                pset,
1556         __unused thread_array_t         *thread_list,
1557         __unused mach_msg_type_number_t *count)
1558 {
1559         return KERN_NOT_SUPPORTED;
1560 }
1561 #else
1562 kern_return_t
1563 processor_set_threads(
1564         processor_set_t         pset,
1565         thread_array_t          *thread_list,
1566         mach_msg_type_number_t  *count)
1567 {
1568         kern_return_t ret;
1569         mach_msg_type_number_t i;
1570
1571         ret = processor_set_things(pset, (void **)thread_list, count, PSET_THING_THREAD, TASK_FLAVOR_CONTROL);
1572         if (ret != KERN_SUCCESS) {
1573                 return ret;
1574         }
1575
1576         /* do the conversion that Mig should handle */
1577         for (i = 0; i < *count; i++) {
1578                 (*thread_list)[i] = (thread_t)convert_thread_to_port((*thread_list)[i]);
1579         }
1580         return KERN_SUCCESS;
1581 }
1582 #endif
1583
1584 /*
1585  *      processor_set_policy_control
1586  *
1587  *      Controls the scheduling attributes governing the processor set.
1588  *      Allows control of enabled policies, and per-policy base and limit
1589  *      priorities.
1590  */
1591 kern_return_t
1592 processor_set_policy_control(
1593         __unused processor_set_t                pset,
1594         __unused int                            flavor,
1595         __unused processor_set_info_t   policy_info,
1596         __unused mach_msg_type_number_t count,
1597         __unused boolean_t                      change)
1598 {
1599         return KERN_INVALID_ARGUMENT;
1600 }
1601
1602 #undef pset_deallocate
1603 void pset_deallocate(processor_set_t pset);
1604 void
1605 pset_deallocate(
1606         __unused processor_set_t        pset)
1607 {
1608         return;
1609 }
1610
1611 #undef pset_reference
1612 void pset_reference(processor_set_t pset);
1613 void
1614 pset_reference(
1615         __unused processor_set_t        pset)
1616 {
1617         return;
1618 }
1619
1620 #if CONFIG_THREAD_GROUPS
1621
1622 pset_cluster_type_t
1623 thread_group_pset_recommendation(__unused struct thread_group *tg, __unused cluster_type_t recommendation)
1624 {
1625 #if __AMP__
1626         switch (recommendation) {
1627         case CLUSTER_TYPE_SMP:
1628         default:
1629                 /*
1630                  * In case of SMP recommendations, check if the thread
1631                  * group has special flags which restrict it to the E
1632                  * cluster.
1633                  */
1634                 if (thread_group_smp_restricted(tg)) {
1635                         return PSET_AMP_E;
1636                 }
1637                 return PSET_AMP_P;
1638         case CLUSTER_TYPE_E:
1639                 return PSET_AMP_E;
1640         case CLUSTER_TYPE_P:
1641                 return PSET_AMP_P;
1642         }
1643 #else /* __AMP__ */
1644         return PSET_SMP;
1645 #endif /* __AMP__ */
1646 }
1647
1648 #endif
1649
1650 pset_cluster_type_t
1651 recommended_pset_type(thread_t thread)
1652 {
1653 #if CONFIG_THREAD_GROUPS && __AMP__
1654         if (thread == THREAD_NULL) {
1655                 return PSET_AMP_E;
1656         }
1657
1658         if (thread->sched_flags & TH_SFLAG_ECORE_ONLY) {
1659                 return PSET_AMP_E;
1660         } else if (thread->sched_flags & TH_SFLAG_PCORE_ONLY) {
1661                 return PSET_AMP_P;
1662         }
1663
1664         if (thread->base_pri <= MAXPRI_THROTTLE) {
1665                 if (os_atomic_load(&sched_perfctl_policy_bg, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1666                         return PSET_AMP_E;
1667                 }
1668         } else if (thread->base_pri <= BASEPRI_UTILITY) {
1669                 if (os_atomic_load(&sched_perfctl_policy_util, relaxed) != SCHED_PERFCTL_POLICY_FOLLOW_GROUP) {
1670                         return PSET_AMP_E;
1671                 }
1672         }
1673
1674 #if DEVELOPMENT || DEBUG
1675         extern bool system_ecore_only;
1676         extern processor_set_t pcore_set;
1677         if (system_ecore_only) {
1678                 if (thread->task->pset_hint == pcore_set) {
1679                         return PSET_AMP_P;
1680                 }
1681                 return PSET_AMP_E;
1682         }
1683 #endif
1684
1685         struct thread_group *tg = thread_group_get(thread);
1686         cluster_type_t recommendation = thread_group_recommendation(tg);
1687         switch (recommendation) {
1688         case CLUSTER_TYPE_SMP:
1689         default:
1690                 if (thread->task == kernel_task) {
1691                         return PSET_AMP_E;
1692                 }
1693                 return PSET_AMP_P;
1694         case CLUSTER_TYPE_E:
1695                 return PSET_AMP_E;
1696         case CLUSTER_TYPE_P:
1697                 return PSET_AMP_P;
1698         }
1699 #else
1700         (void)thread;
1701         return PSET_SMP;
1702 #endif
1703 }
1704
1705 #if CONFIG_THREAD_GROUPS && __AMP__
1706
1707 void
1708 sched_perfcontrol_inherit_recommendation_from_tg(perfcontrol_class_t perfctl_class, boolean_t inherit)
1709 {
1710         sched_perfctl_class_policy_t sched_policy = inherit ? SCHED_PERFCTL_POLICY_FOLLOW_GROUP : SCHED_PERFCTL_POLICY_RESTRICT_E;
1711
1712         KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_PERFCTL_POLICY_CHANGE) | DBG_FUNC_NONE, perfctl_class, sched_policy, 0, 0);
1713
1714         switch (perfctl_class) {
1715         case PERFCONTROL_CLASS_UTILITY:
1716                 os_atomic_store(&sched_perfctl_policy_util, sched_policy, relaxed);
1717                 break;
1718         case PERFCONTROL_CLASS_BACKGROUND:
1719                 os_atomic_store(&sched_perfctl_policy_bg, sched_policy, relaxed);
1720                 break;
1721         default:
1722                 panic("perfctl_class invalid");
1723                 break;
1724         }
1725 }
1726
1727 #elif defined(__arm64__)
1728
1729 /* Define a stub routine since this symbol is exported on all arm64 platforms */
1730 void
1731 sched_perfcontrol_inherit_recommendation_from_tg(__unused perfcontrol_class_t perfctl_class, __unused boolean_t inherit)
1732 {
1733 }
1734
1735 #endif /* defined(__arm64__) */