osfmk/kern/thread_group.c

   1 /*
   2  * Copyright (c) 2016-2020 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <mach/mach_types.h>
  30 #include <kern/kern_types.h>
  31 #include <kern/processor.h>
  32 #include <kern/thread.h>
  33 #include <kern/thread_group.h>
  34 #include <kern/zalloc.h>
  35 #include <kern/task.h>
  36 #include <kern/machine.h>
  37 #include <kern/coalition.h>
  38 #include <sys/errno.h>
  39 #include <kern/queue.h>
  40 #include <kern/locks.h>
  41 #include <kern/thread_group.h>
  42 #include <kern/sched_clutch.h>
  43
  44 #if CONFIG_THREAD_GROUPS
  45
  46 #define CACHELINE_SIZE (1 << MMU_CLINE)
  47
  48 struct thread_group {
  49         uint64_t                tg_id;
  50         char                    tg_name[THREAD_GROUP_MAXNAME];
  51         struct os_refcnt        tg_refcount;
  52         uint32_t                tg_flags;
  53         cluster_type_t          tg_recommendation;
  54         queue_chain_t           tg_queue_chain;
  55 #if CONFIG_SCHED_CLUTCH
  56         struct sched_clutch     tg_sched_clutch;
  57 #endif /* CONFIG_SCHED_CLUTCH */
  58         // 16 bytes of padding here
  59         uint8_t                 tg_machine_data[] __attribute__((aligned(CACHELINE_SIZE)));
  60 } __attribute__((aligned(8)));
  61
  62 static SECURITY_READ_ONLY_LATE(zone_t) tg_zone;
  63 static uint32_t tg_count;
  64 static queue_head_t tg_queue;
  65 static LCK_GRP_DECLARE(tg_lck_grp, "thread_group");
  66 static LCK_MTX_DECLARE(tg_lock, &tg_lck_grp);
  67 static LCK_SPIN_DECLARE(tg_flags_update_lock, &tg_lck_grp);
  68
  69 static uint64_t tg_next_id = 0;
  70 static uint32_t tg_size;
  71 static uint32_t tg_machine_data_size;
  72 static struct thread_group *tg_system;
  73 static struct thread_group *tg_background;
  74 static struct thread_group *tg_adaptive;
  75 static struct thread_group *tg_vm;
  76 static struct thread_group *tg_io_storage;
  77 static struct thread_group *tg_perf_controller;
  78 int tg_set_by_bankvoucher;
  79
  80 static bool thread_group_retain_try(struct thread_group *tg);
  81
  82 /*
  83  * Initialize thread groups at boot
  84  */
  85 void
  86 thread_group_init(void)
  87 {
  88         // Get thread group structure extension from EDT or boot-args (which can override EDT)
  89         if (!PE_parse_boot_argn("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
  90                 if (!PE_get_default("kern.thread_group_extra_bytes", &tg_machine_data_size, sizeof(tg_machine_data_size))) {
  91                         tg_machine_data_size = 8;
  92                 }
  93         }
  94
  95         // Check if thread group can be set by voucher adoption from EDT or boot-args (which can override EDT)
  96         if (!PE_parse_boot_argn("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
  97                 if (!PE_get_default("kern.thread_group_set_by_bankvoucher", &tg_set_by_bankvoucher, sizeof(tg_set_by_bankvoucher))) {
  98                         tg_set_by_bankvoucher = 1;
  99                 }
 100         }
 101
 102         tg_size = sizeof(struct thread_group) + tg_machine_data_size;
 103         if (tg_size % CACHELINE_SIZE) {
 104                 tg_size += CACHELINE_SIZE - (tg_size % CACHELINE_SIZE);
 105         }
 106         tg_machine_data_size = tg_size - sizeof(struct thread_group);
 107         // printf("tg_size=%d(%lu+%d)\n", tg_size, sizeof(struct thread_group), tg_machine_data_size);
 108         assert(offsetof(struct thread_group, tg_machine_data) % CACHELINE_SIZE == 0);
 109         tg_zone = zone_create("thread_groups", tg_size, ZC_NOENCRYPT | ZC_ALIGNMENT_REQUIRED);
 110
 111         queue_head_init(tg_queue);
 112         tg_system = thread_group_create_and_retain();
 113         thread_group_set_name(tg_system, "system");
 114         tg_background = thread_group_create_and_retain();
 115         thread_group_set_name(tg_background, "background");
 116         tg_adaptive = thread_group_create_and_retain();
 117         thread_group_set_name(tg_adaptive, "adaptive");
 118         tg_vm = thread_group_create_and_retain();
 119         thread_group_set_name(tg_vm, "VM");
 120         tg_io_storage = thread_group_create_and_retain();
 121         thread_group_set_name(tg_io_storage, "io storage");
 122         tg_perf_controller = thread_group_create_and_retain();
 123         thread_group_set_name(tg_perf_controller, "perf_controller");
 124
 125         /*
 126          * If CLPC is disabled, it would recommend SMP for all thread groups.
 127          * In that mode, the scheduler would like to restrict the kernel thread
 128          * groups to the E-cluster while all other thread groups are run on the
 129          * P-cluster. To identify the kernel thread groups, mark them with a
 130          * special flag THREAD_GROUP_FLAGS_SMP_RESTRICT which is looked at by
 131          * recommended_pset_type().
 132          */
 133         tg_system->tg_flags |= THREAD_GROUP_FLAGS_SMP_RESTRICT;
 134         tg_vm->tg_flags |= THREAD_GROUP_FLAGS_SMP_RESTRICT;
 135         tg_io_storage->tg_flags |= THREAD_GROUP_FLAGS_SMP_RESTRICT;
 136         tg_perf_controller->tg_flags |= THREAD_GROUP_FLAGS_SMP_RESTRICT;
 137 }
 138
 139 #if CONFIG_SCHED_CLUTCH
 140 /*
 141  * sched_clutch_for_thread
 142  *
 143  * The routine provides a back linkage from the thread to the
 144  * sched_clutch it belongs to. This relationship is based on the
 145  * thread group membership of the thread. Since that membership is
 146  * changed from the thread context with the thread lock held, this
 147  * linkage should be looked at only with the thread lock held or
 148  * when the thread cannot be running (for eg. the thread is in the
 149  * runq and being removed as part of thread_select().
 150  */
 151 sched_clutch_t
 152 sched_clutch_for_thread(thread_t thread)
 153 {
 154         assert(thread->thread_group != NULL);
 155         return &(thread->thread_group->tg_sched_clutch);
 156 }
 157
 158 sched_clutch_t
 159 sched_clutch_for_thread_group(struct thread_group *thread_group)
 160 {
 161         return &(thread_group->tg_sched_clutch);
 162 }
 163
 164 /*
 165  * Translate the TG flags to a priority boost for the sched_clutch.
 166  * This priority boost will apply to the entire clutch represented
 167  * by the thread group.
 168  */
 169 static void
 170 sched_clutch_update_tg_flags(sched_clutch_t clutch, uint8_t flags)
 171 {
 172         sched_clutch_tg_priority_t sc_tg_pri = 0;
 173         if (flags & THREAD_GROUP_FLAGS_UI_APP) {
 174                 sc_tg_pri = SCHED_CLUTCH_TG_PRI_HIGH;
 175         } else if (flags & THREAD_GROUP_FLAGS_EFFICIENT) {
 176                 sc_tg_pri = SCHED_CLUTCH_TG_PRI_LOW;
 177         } else {
 178                 sc_tg_pri = SCHED_CLUTCH_TG_PRI_MED;
 179         }
 180         os_atomic_store(&clutch->sc_tg_priority, sc_tg_pri, relaxed);
 181 }
 182
 183 #endif /* CONFIG_SCHED_CLUTCH */
 184
 185 /*
 186  * Use a spinlock to protect all thread group flag updates.
 187  * The lock should not have heavy contention since these flag updates should
 188  * be infrequent. If this lock has contention issues, it should be changed to
 189  * a per thread-group lock.
 190  *
 191  * The lock protects the flags field in the thread_group structure. It is also
 192  * held while doing callouts to CLPC to reflect these flag changes.
 193  */
 194
 195 void
 196 thread_group_flags_update_lock(void)
 197 {
 198         lck_spin_lock_grp(&tg_flags_update_lock, &tg_lck_grp);
 199 }
 200
 201 void
 202 thread_group_flags_update_unlock(void)
 203 {
 204         lck_spin_unlock(&tg_flags_update_lock);
 205 }
 206
 207 /*
 208  * Inform platform code about already existing thread groups
 209  * or ask it to free state for all thread groups
 210  */
 211 void
 212 thread_group_resync(boolean_t create)
 213 {
 214         struct thread_group *tg;
 215
 216         lck_mtx_lock(&tg_lock);
 217         qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
 218                 if (create) {
 219                         machine_thread_group_init(tg);
 220                 } else {
 221                         machine_thread_group_deinit(tg);
 222                 }
 223         }
 224         lck_mtx_unlock(&tg_lock);
 225 }
 226
 227 /*
 228  * Create new thread group and add new reference to it.
 229  */
 230 struct thread_group *
 231 thread_group_create_and_retain(void)
 232 {
 233         struct thread_group *tg;
 234
 235         tg = (struct thread_group *)zalloc(tg_zone);
 236         if (tg == NULL) {
 237                 panic("thread group zone over commit");
 238         }
 239         assert((uintptr_t)tg % CACHELINE_SIZE == 0);
 240         bzero(tg, sizeof(struct thread_group));
 241
 242 #if CONFIG_SCHED_CLUTCH
 243         /*
 244          * The clutch scheduler maintains a bunch of runqs per thread group. For
 245          * each thread group it maintains a sched_clutch structure. The lifetime
 246          * of that structure is tied directly to the lifetime of the thread group.
 247          */
 248         sched_clutch_init_with_thread_group(&(tg->tg_sched_clutch), tg);
 249
 250         /*
 251          * Since the thread group flags are used to determine any priority promotions
 252          * for the threads in the thread group, initialize them to 0.
 253          */
 254         sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), 0);
 255
 256 #endif /* CONFIG_SCHED_CLUTCH */
 257
 258         lck_mtx_lock(&tg_lock);
 259         tg->tg_id = tg_next_id++;
 260         tg->tg_recommendation = CLUSTER_TYPE_SMP; // no recommendation yet
 261         os_ref_init(&tg->tg_refcount, NULL);
 262         tg_count++;
 263         enqueue_tail(&tg_queue, &tg->tg_queue_chain);
 264         lck_mtx_unlock(&tg_lock);
 265
 266         // call machine layer init before this thread group becomes visible
 267         machine_thread_group_init(tg);
 268
 269         KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NEW), tg->tg_id);
 270
 271         return tg;
 272 }
 273
 274 /*
 275  * Point newly created thread to its home thread group
 276  */
 277 void
 278 thread_group_init_thread(thread_t t, task_t task)
 279 {
 280         struct thread_group *tg = task_coalition_get_thread_group(task);
 281         t->thread_group = tg;
 282         KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
 283             THREAD_GROUP_INVALID, tg->tg_id, (uintptr_t)thread_tid(t));
 284 }
 285
 286 /*
 287  * Set thread group name
 288  */
 289 void
 290 thread_group_set_name(__unused struct thread_group *tg, __unused const char *name)
 291 {
 292         if (name == NULL) {
 293                 return;
 294         }
 295         if (!thread_group_retain_try(tg)) {
 296                 return;
 297         }
 298         if (tg->tg_name[0] == '\0') {
 299                 strncpy(&tg->tg_name[0], name, THREAD_GROUP_MAXNAME);
 300 #if defined(__LP64__)
 301                 KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
 302                     tg->tg_id,
 303                     *(uint64_t*)(void*)&tg->tg_name[0],
 304                     *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t)]
 305                     );
 306 #else /* defined(__LP64__) */
 307                 KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME),
 308                     tg->tg_id,
 309                     *(uint32_t*)(void*)&tg->tg_name[0],
 310                     *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t)]
 311                     );
 312 #endif /* defined(__LP64__) */
 313         }
 314         thread_group_release(tg);
 315 }
 316
 317 void
 318 thread_group_set_flags(struct thread_group *tg, uint64_t flags)
 319 {
 320         thread_group_flags_update_lock();
 321         thread_group_set_flags_locked(tg, flags);
 322         thread_group_flags_update_unlock();
 323 }
 324
 325 void
 326 thread_group_clear_flags(struct thread_group *tg, uint64_t flags)
 327 {
 328         thread_group_flags_update_lock();
 329         thread_group_clear_flags_locked(tg, flags);
 330         thread_group_flags_update_unlock();
 331 }
 332
 333 /*
 334  * Set thread group flags and perform related actions.
 335  * The tg_flags_update_lock should be held.
 336  * Currently supported flags are:
 337  * - THREAD_GROUP_FLAGS_EFFICIENT
 338  * - THREAD_GROUP_FLAGS_UI_APP
 339  */
 340
 341 void
 342 thread_group_set_flags_locked(struct thread_group *tg, uint64_t flags)
 343 {
 344         if ((flags & THREAD_GROUP_FLAGS_VALID) != flags) {
 345                 panic("thread_group_set_flags: Invalid flags %llu", flags);
 346         }
 347
 348         if ((tg->tg_flags & flags) == flags) {
 349                 return;
 350         }
 351
 352         __kdebug_only uint64_t old_flags = tg->tg_flags;
 353         tg->tg_flags |= flags;
 354         machine_thread_group_flags_update(tg, tg->tg_flags);
 355 #if CONFIG_SCHED_CLUTCH
 356         sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), tg->tg_flags);
 357 #endif /* CONFIG_SCHED_CLUTCH */
 358         KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
 359             tg->tg_id, tg->tg_flags, old_flags);
 360 }
 361
 362 /*
 363  * Clear thread group flags and perform related actions
 364  * The tg_flags_update_lock should be held.
 365  * Currently supported flags are:
 366  * - THREAD_GROUP_FLAGS_EFFICIENT
 367  * - THREAD_GROUP_FLAGS_UI_APP
 368  */
 369
 370 void
 371 thread_group_clear_flags_locked(struct thread_group *tg, uint64_t flags)
 372 {
 373         if ((flags & THREAD_GROUP_FLAGS_VALID) != flags) {
 374                 panic("thread_group_clear_flags: Invalid flags %llu", flags);
 375         }
 376
 377         if ((tg->tg_flags & flags) == 0) {
 378                 return;
 379         }
 380
 381         __kdebug_only uint64_t old_flags = tg->tg_flags;
 382         tg->tg_flags &= ~flags;
 383 #if CONFIG_SCHED_CLUTCH
 384         sched_clutch_update_tg_flags(&(tg->tg_sched_clutch), tg->tg_flags);
 385 #endif /* CONFIG_SCHED_CLUTCH */
 386         machine_thread_group_flags_update(tg, tg->tg_flags);
 387         KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FLAGS),
 388             tg->tg_id, tg->tg_flags, old_flags);
 389 }
 390
 391
 392
 393 /*
 394  * Find thread group with specified name and put new reference to it.
 395  */
 396 struct thread_group *
 397 thread_group_find_by_name_and_retain(char *name)
 398 {
 399         struct thread_group *result = NULL;
 400
 401         if (name == NULL) {
 402                 return NULL;
 403         }
 404
 405         if (strncmp("system", name, THREAD_GROUP_MAXNAME) == 0) {
 406                 return thread_group_retain(tg_system);
 407         } else if (strncmp("background", name, THREAD_GROUP_MAXNAME) == 0) {
 408                 return thread_group_retain(tg_background);
 409         } else if (strncmp("adaptive", name, THREAD_GROUP_MAXNAME) == 0) {
 410                 return thread_group_retain(tg_adaptive);
 411         } else if (strncmp("perf_controller", name, THREAD_GROUP_MAXNAME) == 0) {
 412                 return thread_group_retain(tg_perf_controller);
 413         }
 414
 415         struct thread_group *tg;
 416         lck_mtx_lock(&tg_lock);
 417         qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
 418                 if (strncmp(tg->tg_name, name, THREAD_GROUP_MAXNAME) == 0 &&
 419                     thread_group_retain_try(tg)) {
 420                         result = tg;
 421                         break;
 422                 }
 423         }
 424         lck_mtx_unlock(&tg_lock);
 425         return result;
 426 }
 427
 428 /*
 429  * Find thread group with specified ID and add new reference to it.
 430  */
 431 struct thread_group *
 432 thread_group_find_by_id_and_retain(uint64_t id)
 433 {
 434         struct thread_group *tg = NULL;
 435         struct thread_group *result = NULL;
 436
 437         switch (id) {
 438         case THREAD_GROUP_SYSTEM:
 439                 result = tg_system;
 440                 thread_group_retain(tg_system);
 441                 break;
 442         case THREAD_GROUP_BACKGROUND:
 443                 result = tg_background;
 444                 thread_group_retain(tg_background);
 445                 break;
 446         case THREAD_GROUP_ADAPTIVE:
 447                 result = tg_adaptive;
 448                 thread_group_retain(tg_adaptive);
 449                 break;
 450         case THREAD_GROUP_VM:
 451                 result = tg_vm;
 452                 thread_group_retain(tg_vm);
 453                 break;
 454         case THREAD_GROUP_IO_STORAGE:
 455                 result = tg_io_storage;
 456                 thread_group_retain(tg_io_storage);
 457                 break;
 458         case THREAD_GROUP_PERF_CONTROLLER:
 459                 result = tg_perf_controller;
 460                 thread_group_retain(tg_perf_controller);
 461                 break;
 462         default:
 463                 lck_mtx_lock(&tg_lock);
 464                 qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
 465                         if (tg->tg_id == id && thread_group_retain_try(tg)) {
 466                                 result = tg;
 467                                 break;
 468                         }
 469                 }
 470                 lck_mtx_unlock(&tg_lock);
 471         }
 472         return result;
 473 }
 474
 475 /*
 476  * Add new reference to specified thread group
 477  */
 478 struct thread_group *
 479 thread_group_retain(struct thread_group *tg)
 480 {
 481         os_ref_retain(&tg->tg_refcount);
 482         return tg;
 483 }
 484
 485 /*
 486  * Similar to thread_group_retain, but fails for thread groups with a
 487  * zero reference count. Returns true if retained successfully.
 488  */
 489 static bool
 490 thread_group_retain_try(struct thread_group *tg)
 491 {
 492         return os_ref_retain_try(&tg->tg_refcount);
 493 }
 494
 495 /*
 496  * Drop a reference to specified thread group
 497  */
 498 void
 499 thread_group_release(struct thread_group *tg)
 500 {
 501         if (os_ref_release(&tg->tg_refcount) == 0) {
 502                 lck_mtx_lock(&tg_lock);
 503                 tg_count--;
 504                 remqueue(&tg->tg_queue_chain);
 505                 lck_mtx_unlock(&tg_lock);
 506                 static_assert(THREAD_GROUP_MAXNAME >= (sizeof(uint64_t) * 2), "thread group name is too short");
 507                 static_assert(__alignof(struct thread_group) >= __alignof(uint64_t), "thread group name is not 8 bytes aligned");
 508 #if defined(__LP64__)
 509                 KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
 510                     tg->tg_id,
 511                     *(uint64_t*)(void*)&tg->tg_name[0],
 512                     *(uint64_t*)(void*)&tg->tg_name[sizeof(uint64_t)]
 513                     );
 514 #else /* defined(__LP64__) */
 515                 KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_NAME_FREE),
 516                     tg->tg_id,
 517                     *(uint32_t*)(void*)&tg->tg_name[0],
 518                     *(uint32_t*)(void*)&tg->tg_name[sizeof(uint32_t)]
 519                     );
 520 #endif /* defined(__LP64__) */
 521                 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_FREE), tg->tg_id);
 522 #if CONFIG_SCHED_CLUTCH
 523                 sched_clutch_destroy(&(tg->tg_sched_clutch));
 524 #endif /* CONFIG_SCHED_CLUTCH */
 525                 machine_thread_group_deinit(tg);
 526                 zfree(tg_zone, tg);
 527         }
 528 }
 529
 530 /*
 531  * Get thread's current thread group
 532  */
 533 inline struct thread_group *
 534 thread_group_get(thread_t t)
 535 {
 536         return t->thread_group;
 537 }
 538
 539 struct thread_group *
 540 thread_group_get_home_group(thread_t t)
 541 {
 542         return task_coalition_get_thread_group(t->task);
 543 }
 544
 545 #if CONFIG_SCHED_AUTO_JOIN
 546
 547 /*
 548  * thread_set_thread_group_auto_join()
 549  *
 550  * Sets the thread group of a thread based on auto-join rules.
 551  *
 552  * Preconditions:
 553  * - Thread must not be part of a runq (freshly made runnable threads or terminating only)
 554  * - Thread must be locked by the caller already
 555  */
 556 static void
 557 thread_set_thread_group_auto_join(thread_t t, struct thread_group *tg, __unused struct thread_group *old_tg)
 558 {
 559         assert(t->runq == PROCESSOR_NULL);
 560         t->thread_group = tg;
 561
 562         /*
 563          * If the thread group is being changed for the current thread, callout to
 564          * CLPC to update the thread's information at that layer. This makes sure CLPC
 565          * has consistent state when the current thread is going off-core.
 566          */
 567         if (t == current_thread()) {
 568                 uint64_t ctime = mach_approximate_time();
 569                 uint64_t arg1, arg2;
 570                 machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
 571                 machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, PERFCONTROL_CALLOUT_WAKE_UNSAFE, t);
 572         }
 573 }
 574
 575 #endif /* CONFIG_SCHED_AUTO_JOIN */
 576
 577 /*
 578  * thread_set_thread_group_explicit()
 579  *
 580  * Sets the thread group of a thread based on default non auto-join rules.
 581  *
 582  * Preconditions:
 583  * - Thread must be the current thread
 584  * - Caller must not have the thread locked
 585  * - Interrupts must be disabled
 586  */
 587 static void
 588 thread_set_thread_group_explicit(thread_t t, struct thread_group *tg, __unused struct thread_group *old_tg)
 589 {
 590         assert(t == current_thread());
 591         /*
 592          * In the clutch scheduler world, the runq membership of the thread
 593          * is based on its thread group membership and its scheduling bucket.
 594          * In order to synchronize with the priority (and therefore bucket)
 595          * getting updated concurrently, it is important to perform the
 596          * thread group change also under the thread lock.
 597          */
 598         thread_lock(t);
 599         t->thread_group = tg;
 600
 601 #if CONFIG_SCHED_CLUTCH
 602         sched_clutch_t old_clutch = (old_tg) ? &(old_tg->tg_sched_clutch) : NULL;
 603         sched_clutch_t new_clutch = (tg) ? &(tg->tg_sched_clutch) : NULL;
 604         if (SCHED_CLUTCH_THREAD_ELIGIBLE(t)) {
 605                 sched_clutch_thread_clutch_update(t, old_clutch, new_clutch);
 606         }
 607 #endif /* CONFIG_SCHED_CLUTCH */
 608
 609         thread_unlock(t);
 610
 611         uint64_t ctime = mach_approximate_time();
 612         uint64_t arg1, arg2;
 613         machine_thread_going_on_core(t, thread_get_urgency(t, &arg1, &arg2), 0, 0, ctime);
 614         machine_switch_perfcontrol_state_update(THREAD_GROUP_UPDATE, ctime, 0, t);
 615 }
 616
 617 /*
 618  * thread_set_thread_group()
 619  *
 620  * Overrides the current home thread group with an override group. However,
 621  * an adopted work interval overrides the override. Does not take a reference
 622  * on the group, so caller must guarantee group lifetime lasts as long as the
 623  * group is set.
 624  *
 625  * The thread group is set according to a hierarchy:
 626  *
 627  * 1) work interval specified group (explicit API)
 628  * 2) Auto-join thread group (wakeup tracking for special work intervals)
 629  * 3) bank voucher carried group (implicitly set)
 630  * 4) coalition default thread group (ambient)
 631  */
 632 static void
 633 thread_set_thread_group(thread_t t, struct thread_group *tg, bool auto_join)
 634 {
 635         struct thread_group *home_tg = thread_group_get_home_group(t);
 636         struct thread_group *old_tg = NULL;
 637
 638         if (tg == NULL) {
 639                 /* when removing an override, revert to home group */
 640                 tg = home_tg;
 641         }
 642
 643         spl_t s = splsched();
 644
 645         old_tg = t->thread_group;
 646
 647         if (old_tg != tg) {
 648                 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_SET),
 649                     t->thread_group ? t->thread_group->tg_id : 0,
 650                     tg->tg_id, (uintptr_t)thread_tid(t), home_tg->tg_id);
 651
 652                 /*
 653                  * Based on whether this is a change due to auto-join, the join does
 654                  * different things and has different expectations.
 655                  */
 656                 if (auto_join) {
 657 #if CONFIG_SCHED_AUTO_JOIN
 658                         /*
 659                          * set thread group with auto-join rules. This has the
 660                          * implicit assumption that the thread lock is already held.
 661                          * Also this could happen to any thread (current or thread
 662                          * being context switched).
 663                          */
 664                         thread_set_thread_group_auto_join(t, tg, old_tg);
 665 #else /* CONFIG_SCHED_AUTO_JOIN */
 666                         panic("Auto-Join unsupported on this platform");
 667 #endif /* CONFIG_SCHED_AUTO_JOIN */
 668                 } else {
 669                         /*
 670                          * set thread group with the explicit join rules. This has
 671                          * the implicit assumption that the thread is not locked. Also
 672                          * this would be done only to the current thread.
 673                          */
 674                         thread_set_thread_group_explicit(t, tg, old_tg);
 675                 }
 676         }
 677
 678         splx(s);
 679 }
 680
 681 void
 682 thread_group_set_bank(thread_t t, struct thread_group *tg)
 683 {
 684         /* work interval group overrides any bank override group */
 685         if (t->th_work_interval) {
 686                 return;
 687         }
 688
 689         /* boot arg disables groups in bank */
 690         if (tg_set_by_bankvoucher == FALSE) {
 691                 return;
 692         }
 693
 694         thread_set_thread_group(t, tg, false);
 695 }
 696
 697 /*
 698  * thread_set_work_interval_thread_group()
 699  *
 700  * Sets the thread's group to the work interval thread group.
 701  * If auto_join == true, thread group is being overriden through scheduler
 702  * auto-join policies.
 703  *
 704  * Preconditions for auto-join case:
 705  * - t is not current_thread and t should be locked.
 706  * - t should not be running on a remote core; thread context switching is a valid state for this.
 707  */
 708 void
 709 thread_set_work_interval_thread_group(thread_t t, struct thread_group *tg, bool auto_join)
 710 {
 711         if (tg == NULL) {
 712                 /*
 713                  * when removing a work interval override, fall back
 714                  * to the current voucher override.
 715                  *
 716                  * In the auto_join case, the thread is already locked by the caller so
 717                  * its unsafe to get the thread group from the current voucher (since
 718                  * that might require taking task lock and ivac lock). However, the
 719                  * auto-join policy does not allow threads to switch thread groups based
 720                  * on voucher overrides.
 721                  *
 722                  * For the normal case, lookup the thread group from the currently adopted
 723                  * voucher and use that as the fallback tg.
 724                  */
 725
 726                 if (auto_join == false) {
 727                         tg = thread_get_current_voucher_thread_group(t);
 728                 }
 729         }
 730
 731         thread_set_thread_group(t, tg, auto_join);
 732 }
 733
 734 inline cluster_type_t
 735 thread_group_recommendation(struct thread_group *tg)
 736 {
 737         if (tg == NULL) {
 738                 return CLUSTER_TYPE_SMP;
 739         } else {
 740                 return tg->tg_recommendation;
 741         }
 742 }
 743
 744 inline uint64_t
 745 thread_group_get_id(struct thread_group *tg)
 746 {
 747         return tg->tg_id;
 748 }
 749
 750 uint32_t
 751 thread_group_count(void)
 752 {
 753         return tg_count;
 754 }
 755
 756 /*
 757  * Can only be called while tg cannot be destroyed
 758  */
 759 inline const char*
 760 thread_group_get_name(struct thread_group *tg)
 761 {
 762         return tg->tg_name;
 763 }
 764
 765 inline void *
 766 thread_group_get_machine_data(struct thread_group *tg)
 767 {
 768         return &tg->tg_machine_data;
 769 }
 770
 771 inline uint32_t
 772 thread_group_machine_data_size(void)
 773 {
 774         return tg_machine_data_size;
 775 }
 776
 777 kern_return_t
 778 thread_group_iterate_stackshot(thread_group_iterate_fn_t callout, void *arg)
 779 {
 780         struct thread_group *tg;
 781         int i = 0;
 782         qe_foreach_element(tg, &tg_queue, tg_queue_chain) {
 783                 if (tg == NULL || !ml_validate_nofault((vm_offset_t)tg, sizeof(struct thread_group))) {
 784                         return KERN_FAILURE;
 785                 }
 786                 callout(arg, i, tg);
 787                 i++;
 788         }
 789         return KERN_SUCCESS;
 790 }
 791
 792 void
 793 thread_group_join_io_storage(void)
 794 {
 795         struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_IO_STORAGE);
 796         assert(tg != NULL);
 797         thread_set_thread_group(current_thread(), tg, false);
 798 }
 799
 800 void
 801 thread_group_join_perf_controller(void)
 802 {
 803         struct thread_group *tg = thread_group_find_by_id_and_retain(THREAD_GROUP_PERF_CONTROLLER);
 804         assert(tg != NULL);
 805         thread_set_thread_group(current_thread(), tg, false);
 806 }
 807
 808 void
 809 thread_group_vm_add(void)
 810 {
 811         assert(tg_vm != NULL);
 812         thread_set_thread_group(current_thread(), thread_group_find_by_id_and_retain(THREAD_GROUP_VM), false);
 813 }
 814
 815 uint32_t
 816 thread_group_get_flags(struct thread_group *tg)
 817 {
 818         return tg->tg_flags;
 819 }
 820
 821 /*
 822  * Returns whether the thread group is restricted to the E-cluster when CLPC is
 823  * turned off.
 824  */
 825 boolean_t
 826 thread_group_smp_restricted(struct thread_group *tg)
 827 {
 828         if (tg->tg_flags & THREAD_GROUP_FLAGS_SMP_RESTRICT) {
 829                 return true;
 830         } else {
 831                 return false;
 832         }
 833 }
 834
 835 void
 836 thread_group_update_recommendation(struct thread_group *tg, cluster_type_t new_recommendation)
 837 {
 838         /*
 839          * Since the tg->tg_recommendation field is read by CPUs trying to determine
 840          * where a thread/thread group needs to be placed, it is important to use
 841          * atomic operations to update the recommendation.
 842          */
 843         os_atomic_store(&tg->tg_recommendation, new_recommendation, relaxed);
 844 }
 845
 846 #if CONFIG_SCHED_EDGE
 847
 848 int sched_edge_restrict_ut = 1;
 849 int sched_edge_restrict_bg = 1;
 850
 851 void
 852 sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
 853 {
 854         struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
 855         /*
 856          * CLUSTER_TYPE_SMP was used for some debugging support when CLPC dynamic control was turned off.
 857          * In more recent implementations, CLPC simply recommends "P-spill" when dynamic control is turned off. So it should
 858          * never be recommending CLUSTER_TYPE_SMP for thread groups.
 859          */
 860         assert(new_recommendation != CLUSTER_TYPE_SMP);
 861         /*
 862          * The Edge scheduler expects preferred cluster recommendations for each QoS level within a TG. Until the new CLPC
 863          * routine is being called, fake out the call from the old CLPC interface.
 864          */
 865         uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX] = {0};
 866         /*
 867          * For all buckets higher than UT, apply the recommendation to the thread group bucket
 868          */
 869         for (sched_bucket_t bucket = TH_BUCKET_FIXPRI; bucket < TH_BUCKET_SHARE_UT; bucket++) {
 870                 tg_bucket_preferred_cluster[bucket] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
 871         }
 872         /* For UT & BG QoS, set the recommendation only if they havent been restricted via sysctls */
 873         if (!sched_edge_restrict_ut) {
 874                 tg_bucket_preferred_cluster[TH_BUCKET_SHARE_UT] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
 875         }
 876         if (!sched_edge_restrict_bg) {
 877                 tg_bucket_preferred_cluster[TH_BUCKET_SHARE_BG] = (new_recommendation == pset_type_for_id(0)) ? 0 : 1;
 878         }
 879         sched_perfcontrol_preferred_cluster_options_t options = 0;
 880         if (new_recommendation == CLUSTER_TYPE_P) {
 881                 options |= SCHED_PERFCONTROL_PREFERRED_CLUSTER_MIGRATE_RUNNING;
 882         }
 883         sched_edge_tg_preferred_cluster_change(tg, tg_bucket_preferred_cluster, options);
 884 }
 885
 886 void
 887 sched_perfcontrol_edge_matrix_get(sched_clutch_edge *edge_matrix, bool *edge_request_bitmap, uint64_t flags, uint64_t matrix_order)
 888 {
 889         sched_edge_matrix_get(edge_matrix, edge_request_bitmap, flags, matrix_order);
 890 }
 891
 892 void
 893 sched_perfcontrol_edge_matrix_set(sched_clutch_edge *edge_matrix, bool *edge_changes_bitmap, uint64_t flags, uint64_t matrix_order)
 894 {
 895         sched_edge_matrix_set(edge_matrix, edge_changes_bitmap, flags, matrix_order);
 896 }
 897
 898 void
 899 sched_perfcontrol_thread_group_preferred_clusters_set(void *machine_data, uint32_t tg_preferred_cluster,
 900     uint32_t overrides[PERFCONTROL_CLASS_MAX], sched_perfcontrol_preferred_cluster_options_t options)
 901 {
 902         struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
 903         uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX] = {
 904                 [TH_BUCKET_FIXPRI]   = (overrides[PERFCONTROL_CLASS_ABOVEUI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_ABOVEUI] : tg_preferred_cluster,
 905                 [TH_BUCKET_SHARE_FG] = (overrides[PERFCONTROL_CLASS_UI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UI] : tg_preferred_cluster,
 906                 [TH_BUCKET_SHARE_IN] = (overrides[PERFCONTROL_CLASS_UI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UI] : tg_preferred_cluster,
 907                 [TH_BUCKET_SHARE_DF] = (overrides[PERFCONTROL_CLASS_NONUI] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_NONUI] : tg_preferred_cluster,
 908                 [TH_BUCKET_SHARE_UT] = (overrides[PERFCONTROL_CLASS_UTILITY] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_UTILITY] : tg_preferred_cluster,
 909                 [TH_BUCKET_SHARE_BG] = (overrides[PERFCONTROL_CLASS_BACKGROUND] != SCHED_PERFCONTROL_PREFERRED_CLUSTER_OVERRIDE_NONE) ? overrides[PERFCONTROL_CLASS_BACKGROUND] : tg_preferred_cluster,
 910         };
 911         sched_edge_tg_preferred_cluster_change(tg, tg_bucket_preferred_cluster, options);
 912 }
 913
 914 #else /* CONFIG_SCHED_EDGE */
 915
 916 void
 917 sched_perfcontrol_thread_group_recommend(__unused void *machine_data, __unused cluster_type_t new_recommendation)
 918 {
 919         struct thread_group *tg = (struct thread_group *)((uintptr_t)machine_data - offsetof(struct thread_group, tg_machine_data));
 920         SCHED(thread_group_recommendation_change)(tg, new_recommendation);
 921 }
 922
 923 void
 924 sched_perfcontrol_edge_matrix_get(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_request_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
 925 {
 926 }
 927
 928 void
 929 sched_perfcontrol_edge_matrix_set(__unused sched_clutch_edge *edge_matrix, __unused bool *edge_changes_bitmap, __unused uint64_t flags, __unused uint64_t matrix_order)
 930 {
 931 }
 932
 933 void
 934 sched_perfcontrol_thread_group_preferred_clusters_set(__unused void *machine_data, __unused uint32_t tg_preferred_cluster,
 935     __unused uint32_t overrides[PERFCONTROL_CLASS_MAX], __unused sched_perfcontrol_preferred_cluster_options_t options)
 936 {
 937 }
 938
 939 #endif /* CONFIG_SCHED_EDGE */
 940
 941 #endif /* CONFIG_THREAD_GROUPS */