osfmk/kern/thread.c

   1 /*
   2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   kern/thread.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub
  61  *      Date:   1986
  62  *
  63  *      Thread management primitives implementation.
  64  */
  65 /*
  66  * Copyright (c) 1993 The University of Utah and
  67  * the Computer Systems Laboratory (CSL).  All rights reserved.
  68  *
  69  * Permission to use, copy, modify and distribute this software and its
  70  * documentation is hereby granted, provided that both the copyright
  71  * notice and this permission notice appear in all copies of the
  72  * software, derivative works or modified versions, and any portions
  73  * thereof, and that both notices appear in supporting documentation.
  74  *
  75  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  76  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  77  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  78  *
  79  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  80  * improvements that they make and grant CSL redistribution rights.
  81  *
  82  */
  83
  84 #include <mach/mach_types.h>
  85 #include <mach/boolean.h>
  86 #include <mach/policy.h>
  87 #include <mach/thread_info.h>
  88 #include <mach/thread_special_ports.h>
  89 #include <mach/thread_status.h>
  90 #include <mach/time_value.h>
  91 #include <mach/vm_param.h>
  92
  93 #include <machine/thread.h>
  94 #include <machine/pal_routines.h>
  95 #include <machine/limits.h>
  96
  97 #include <kern/kern_types.h>
  98 #include <kern/kalloc.h>
  99 #include <kern/cpu_data.h>
 100 #include <kern/extmod_statistics.h>
 101 #include <kern/ipc_mig.h>
 102 #include <kern/ipc_tt.h>
 103 #include <kern/mach_param.h>
 104 #include <kern/machine.h>
 105 #include <kern/misc_protos.h>
 106 #include <kern/processor.h>
 107 #include <kern/queue.h>
 108 #include <kern/sched.h>
 109 #include <kern/sched_prim.h>
 110 #include <kern/sync_lock.h>
 111 #include <kern/syscall_subr.h>
 112 #include <kern/task.h>
 113 #include <kern/thread.h>
 114 #include <kern/thread_group.h>
 115 #include <kern/coalition.h>
 116 #include <kern/host.h>
 117 #include <kern/zalloc.h>
 118 #include <kern/assert.h>
 119 #include <kern/exc_resource.h>
 120 #include <kern/exc_guard.h>
 121 #include <kern/telemetry.h>
 122 #include <kern/policy_internal.h>
 123 #include <kern/turnstile.h>
 124 #include <kern/sched_clutch.h>
 125
 126 #include <corpses/task_corpse.h>
 127 #if KPC
 128 #include <kern/kpc.h>
 129 #endif
 130
 131 #if MONOTONIC
 132 #include <kern/monotonic.h>
 133 #include <machine/monotonic.h>
 134 #endif /* MONOTONIC */
 135
 136 #include <ipc/ipc_kmsg.h>
 137 #include <ipc/ipc_port.h>
 138 #include <bank/bank_types.h>
 139
 140 #include <vm/vm_kern.h>
 141 #include <vm/vm_pageout.h>
 142
 143 #include <sys/kdebug.h>
 144 #include <sys/bsdtask_info.h>
 145 #include <mach/sdt.h>
 146 #include <san/kasan.h>
 147 #if CONFIG_KSANCOV
 148 #include <san/ksancov.h>
 149 #endif
 150
 151 #include <stdatomic.h>
 152
 153 #if defined(HAS_APPLE_PAC)
 154 #include <ptrauth.h>
 155 #include <arm64/proc_reg.h>
 156 #endif /* defined(HAS_APPLE_PAC) */
 157
 158 /*
 159  * Exported interfaces
 160  */
 161 #include <mach/task_server.h>
 162 #include <mach/thread_act_server.h>
 163 #include <mach/mach_host_server.h>
 164 #include <mach/host_priv_server.h>
 165 #include <mach/mach_voucher_server.h>
 166 #include <kern/policy_internal.h>
 167
 168 #if CONFIG_MACF
 169 #include <security/mac_mach_internal.h>
 170 #endif
 171
 172 LCK_GRP_DECLARE(thread_lck_grp, "thread");
 173
 174 ZONE_DECLARE(thread_zone, "threads", sizeof(struct thread), ZC_ZFREE_CLEARMEM);
 175
 176 ZONE_DECLARE(thread_qos_override_zone, "thread qos override",
 177     sizeof(struct thread_qos_override), ZC_NOENCRYPT);
 178
 179 static struct mpsc_daemon_queue thread_stack_queue;
 180 static struct mpsc_daemon_queue thread_terminate_queue;
 181 static struct mpsc_daemon_queue thread_deallocate_queue;
 182 static struct mpsc_daemon_queue thread_exception_queue;
 183
 184 decl_simple_lock_data(static, crashed_threads_lock);
 185 static queue_head_t             crashed_threads_queue;
 186
 187 struct thread_exception_elt {
 188         struct mpsc_queue_chain link;
 189         exception_type_t        exception_type;
 190         task_t                  exception_task;
 191         thread_t                exception_thread;
 192 };
 193
 194 static SECURITY_READ_ONLY_LATE(struct thread) thread_template = {
 195 #if MACH_ASSERT
 196         .thread_magic               = THREAD_MAGIC,
 197 #endif /* MACH_ASSERT */
 198         .wait_result                = THREAD_WAITING,
 199         .options                    = THREAD_ABORTSAFE,
 200         .state                      = TH_WAIT | TH_UNINT,
 201         .th_sched_bucket            = TH_BUCKET_RUN,
 202         .base_pri                   = BASEPRI_DEFAULT,
 203         .realtime.deadline          = UINT64_MAX,
 204         .last_made_runnable_time    = THREAD_NOT_RUNNABLE,
 205         .last_basepri_change_time   = THREAD_NOT_RUNNABLE,
 206 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
 207         .pri_shift                  = INT8_MAX,
 208 #endif
 209         /* timers are initialized in thread_bootstrap */
 210 };
 211
 212 static struct thread init_thread;
 213 static void thread_deallocate_enqueue(thread_t thread);
 214 static void thread_deallocate_complete(thread_t thread);
 215
 216 #ifdef MACH_BSD
 217 extern void proc_exit(void *);
 218 extern mach_exception_data_type_t proc_encode_exit_exception_code(void *);
 219 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 220 extern uint64_t get_return_to_kernel_offset_from_proc(void *p);
 221 extern int      proc_selfpid(void);
 222 extern void     proc_name(int, char*, int);
 223 extern char *   proc_name_address(void *p);
 224 #endif /* MACH_BSD */
 225
 226 extern int disable_exc_resource;
 227 extern int audio_active;
 228 extern int debug_task;
 229 int thread_max = CONFIG_THREAD_MAX;     /* Max number of threads */
 230 int task_threadmax = CONFIG_THREAD_MAX;
 231
 232 static uint64_t         thread_unique_id = 100;
 233
 234 struct _thread_ledger_indices thread_ledgers = { .cpu_time = -1 };
 235 static ledger_template_t thread_ledger_template = NULL;
 236 static void init_thread_ledgers(void);
 237
 238 #if CONFIG_JETSAM
 239 void jetsam_on_ledger_cpulimit_exceeded(void);
 240 #endif
 241
 242 extern int task_thread_soft_limit;
 243 extern int exc_via_corpse_forking;
 244
 245 #if DEVELOPMENT || DEBUG
 246 extern int exc_resource_threads_enabled;
 247 #endif /* DEVELOPMENT || DEBUG */
 248
 249 /*
 250  * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry.
 251  *
 252  * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user
 253  *  stacktraces, aka micro-stackshots)
 254  */
 255 #define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70
 256
 257 /* Percentage. Level at which we start gathering telemetry. */
 258 static TUNABLE(uint8_t, cpumon_ustackshots_trigger_pct,
 259     "cpumon_ustackshots_trigger_pct", CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT);
 260 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void);
 261 #if DEVELOPMENT || DEBUG
 262 void __attribute__((noinline)) SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(task_t, int);
 263 #endif /* DEVELOPMENT || DEBUG */
 264
 265 /*
 266  * The smallest interval over which we support limiting CPU consumption is 1ms
 267  */
 268 #define MINIMUM_CPULIMIT_INTERVAL_MS 1
 269
 270 os_refgrp_decl(static, thread_refgrp, "thread", NULL);
 271
 272 static inline void
 273 init_thread_from_template(thread_t thread)
 274 {
 275         /*
 276          * In general, struct thread isn't trivially-copyable, since it may
 277          * contain pointers to thread-specific state.  This may be enforced at
 278          * compile time on architectures that store authed + diversified
 279          * pointers in machine_thread.
 280          *
 281          * In this specific case, where we're initializing a new thread from a
 282          * thread_template, we know all diversified pointers are NULL; these are
 283          * safe to bitwise copy.
 284          */
 285 #pragma clang diagnostic push
 286 #pragma clang diagnostic ignored "-Wnontrivial-memaccess"
 287         memcpy(thread, &thread_template, sizeof(*thread));
 288 #pragma clang diagnostic pop
 289 }
 290
 291 thread_t
 292 thread_bootstrap(void)
 293 {
 294         /*
 295          *      Fill in a template thread for fast initialization.
 296          */
 297         timer_init(&thread_template.user_timer);
 298         timer_init(&thread_template.system_timer);
 299         timer_init(&thread_template.ptime);
 300         timer_init(&thread_template.runnable_timer);
 301
 302         init_thread_from_template(&init_thread);
 303         /* fiddle with init thread to skip asserts in set_sched_pri */
 304         init_thread.sched_pri = MAXPRI_KERNEL;
 305 #if DEBUG || DEVELOPMENT
 306         queue_init(&init_thread.t_temp_alloc_list);
 307 #endif /* DEBUG || DEVELOPMENT */
 308
 309         return &init_thread;
 310 }
 311
 312 void
 313 thread_machine_init_template(void)
 314 {
 315         machine_thread_template_init(&thread_template);
 316 }
 317
 318 void
 319 thread_init(void)
 320 {
 321         stack_init();
 322
 323         thread_policy_init();
 324
 325         /*
 326          *      Initialize any machine-dependent
 327          *      per-thread structures necessary.
 328          */
 329         machine_thread_init();
 330
 331         init_thread_ledgers();
 332 }
 333
 334 boolean_t
 335 thread_is_active(thread_t thread)
 336 {
 337         return thread->active;
 338 }
 339
 340 void
 341 thread_corpse_continue(void)
 342 {
 343         thread_t thread = current_thread();
 344
 345         thread_terminate_internal(thread, TH_TERMINATE_OPTION_NONE);
 346
 347         /*
 348          * Handle the thread termination directly
 349          * here instead of returning to userspace.
 350          */
 351         assert(thread->active == FALSE);
 352         thread_ast_clear(thread, AST_APC);
 353         thread_apc_ast(thread);
 354
 355         panic("thread_corpse_continue");
 356         /*NOTREACHED*/
 357 }
 358
 359 __dead2
 360 static void
 361 thread_terminate_continue(void)
 362 {
 363         panic("thread_terminate_continue");
 364         /*NOTREACHED*/
 365 }
 366
 367 /*
 368  *      thread_terminate_self:
 369  */
 370 void
 371 thread_terminate_self(void)
 372 {
 373         thread_t                thread = current_thread();
 374         task_t                  task;
 375         int threadcnt;
 376
 377         if (thread->t_temp_alloc_count) {
 378                 kheap_temp_leak_panic(thread);
 379         }
 380
 381         pal_thread_terminate_self(thread);
 382
 383         DTRACE_PROC(lwp__exit);
 384
 385         thread_mtx_lock(thread);
 386
 387         ipc_thread_disable(thread);
 388
 389         thread_mtx_unlock(thread);
 390
 391         thread_sched_call(thread, NULL);
 392
 393         spl_t s = splsched();
 394         thread_lock(thread);
 395
 396         thread_depress_abort_locked(thread);
 397
 398         thread_unlock(thread);
 399         splx(s);
 400
 401 #if CONFIG_TASKWATCH
 402         thead_remove_taskwatch(thread);
 403 #endif /* CONFIG_TASKWATCH */
 404
 405         work_interval_thread_terminate(thread);
 406
 407         thread_mtx_lock(thread);
 408
 409         thread_policy_reset(thread);
 410
 411         thread_mtx_unlock(thread);
 412
 413         assert(thread->th_work_interval == NULL);
 414
 415         bank_swap_thread_bank_ledger(thread, NULL);
 416
 417         if (kdebug_enable && bsd_hasthreadname(thread->uthread)) {
 418                 char threadname[MAXTHREADNAMESIZE];
 419                 bsd_getthreadname(thread->uthread, threadname);
 420                 kernel_debug_string_simple(TRACE_STRING_THREADNAME_PREV, threadname);
 421         }
 422
 423         task = thread->task;
 424         uthread_cleanup(task, thread->uthread, task->bsd_info);
 425
 426         if (kdebug_enable && task->bsd_info && !task_is_exec_copy(task)) {
 427                 /* trace out pid before we sign off */
 428                 long dbg_arg1 = 0;
 429                 long dbg_arg2 = 0;
 430
 431                 kdbg_trace_data(thread->task->bsd_info, &dbg_arg1, &dbg_arg2);
 432 #if MONOTONIC
 433                 if (kdebug_debugid_enabled(DBG_MT_INSTRS_CYCLES_THR_EXIT)) {
 434                         uint64_t counts[MT_CORE_NFIXED];
 435                         uint64_t thread_user_time;
 436                         uint64_t thread_system_time;
 437                         thread_user_time = timer_grab(&thread->user_timer);
 438                         thread_system_time = timer_grab(&thread->system_timer);
 439                         mt_fixed_thread_counts(thread, counts);
 440                         KDBG_RELEASE(DBG_MT_INSTRS_CYCLES_THR_EXIT,
 441 #ifdef MT_CORE_INSTRS
 442                             counts[MT_CORE_INSTRS],
 443 #else /* defined(MT_CORE_INSTRS) */
 444                             0,
 445 #endif/* !defined(MT_CORE_INSTRS) */
 446                             counts[MT_CORE_CYCLES],
 447                             thread_system_time, thread_user_time);
 448                 }
 449 #endif/* MONOTONIC */
 450                 KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE_PID, dbg_arg1, dbg_arg2);
 451         }
 452
 453         /*
 454          * After this subtraction, this thread should never access
 455          * task->bsd_info unless it got 0 back from the os_atomic_dec.  It
 456          * could be racing with other threads to be the last thread in the
 457          * process, and the last thread in the process will tear down the proc
 458          * structure and zero-out task->bsd_info.
 459          */
 460         threadcnt = os_atomic_dec(&task->active_thread_count, relaxed);
 461
 462         /*
 463          * If we are the last thread to terminate and the task is
 464          * associated with a BSD process, perform BSD process exit.
 465          */
 466         if (threadcnt == 0 && task->bsd_info != NULL && !task_is_exec_copy(task)) {
 467                 mach_exception_data_type_t subcode = 0;
 468                 if (kdebug_enable) {
 469                         /* since we're the last thread in this process, trace out the command name too */
 470                         long args[4] = {};
 471                         kdbg_trace_string(thread->task->bsd_info, &args[0], &args[1], &args[2], &args[3]);
 472 #if MONOTONIC
 473                         if (kdebug_debugid_enabled(DBG_MT_INSTRS_CYCLES_PROC_EXIT)) {
 474                                 uint64_t counts[MT_CORE_NFIXED];
 475                                 uint64_t task_user_time;
 476                                 uint64_t task_system_time;
 477                                 mt_fixed_task_counts(task, counts);
 478                                 /* since the thread time is not yet added to the task */
 479                                 task_user_time = task->total_user_time + timer_grab(&thread->user_timer);
 480                                 task_system_time = task->total_system_time + timer_grab(&thread->system_timer);
 481                                 KDBG_RELEASE((DBG_MT_INSTRS_CYCLES_PROC_EXIT),
 482 #ifdef MT_CORE_INSTRS
 483                                     counts[MT_CORE_INSTRS],
 484 #else /* defined(MT_CORE_INSTRS) */
 485                                     0,
 486 #endif/* !defined(MT_CORE_INSTRS) */
 487                                     counts[MT_CORE_CYCLES],
 488                                     task_system_time, task_user_time);
 489                         }
 490 #endif/* MONOTONIC */
 491                         KDBG_RELEASE(TRACE_STRING_PROC_EXIT, args[0], args[1], args[2], args[3]);
 492                 }
 493
 494                 /* Get the exit reason before proc_exit */
 495                 subcode = proc_encode_exit_exception_code(task->bsd_info);
 496                 proc_exit(task->bsd_info);
 497                 /*
 498                  * if there is crash info in task
 499                  * then do the deliver action since this is
 500                  * last thread for this task.
 501                  */
 502                 if (task->corpse_info) {
 503                         task_deliver_crash_notification(task, current_thread(), EXC_RESOURCE, subcode);
 504                 }
 505         }
 506
 507         if (threadcnt == 0) {
 508                 task_lock(task);
 509                 if (task_is_a_corpse_fork(task)) {
 510                         thread_wakeup((event_t)&task->active_thread_count);
 511                 }
 512                 task_unlock(task);
 513         }
 514
 515         uthread_cred_free(thread->uthread);
 516
 517         s = splsched();
 518         thread_lock(thread);
 519
 520         /*
 521          * Ensure that the depress timer is no longer enqueued,
 522          * so the timer (stored in the thread) can be safely deallocated
 523          *
 524          * TODO: build timer_call_cancel_wait
 525          */
 526
 527         assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) == 0);
 528
 529         uint32_t delay_us = 1;
 530
 531         while (thread->depress_timer_active > 0) {
 532                 thread_unlock(thread);
 533                 splx(s);
 534
 535                 delay(delay_us++);
 536
 537                 if (delay_us > USEC_PER_SEC) {
 538                         panic("depress timer failed to inactivate!"
 539                             "thread: %p depress_timer_active: %d",
 540                             thread, thread->depress_timer_active);
 541                 }
 542
 543                 s = splsched();
 544                 thread_lock(thread);
 545         }
 546
 547         /*
 548          *      Cancel wait timer, and wait for
 549          *      concurrent expirations.
 550          */
 551         if (thread->wait_timer_is_set) {
 552                 thread->wait_timer_is_set = FALSE;
 553
 554                 if (timer_call_cancel(&thread->wait_timer)) {
 555                         thread->wait_timer_active--;
 556                 }
 557         }
 558
 559         delay_us = 1;
 560
 561         while (thread->wait_timer_active > 0) {
 562                 thread_unlock(thread);
 563                 splx(s);
 564
 565                 delay(delay_us++);
 566
 567                 if (delay_us > USEC_PER_SEC) {
 568                         panic("wait timer failed to inactivate!"
 569                             "thread: %p wait_timer_active: %d",
 570                             thread, thread->wait_timer_active);
 571                 }
 572
 573                 s = splsched();
 574                 thread_lock(thread);
 575         }
 576
 577         /*
 578          *      If there is a reserved stack, release it.
 579          */
 580         if (thread->reserved_stack != 0) {
 581                 stack_free_reserved(thread);
 582                 thread->reserved_stack = 0;
 583         }
 584
 585         /*
 586          *      Mark thread as terminating, and block.
 587          */
 588         thread->state |= TH_TERMINATE;
 589         thread_mark_wait_locked(thread, THREAD_UNINT);
 590
 591         assert((thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED) == 0);
 592         assert((thread->sched_flags & TH_SFLAG_RW_PROMOTED) == 0);
 593         assert((thread->sched_flags & TH_SFLAG_EXEC_PROMOTED) == 0);
 594         assert((thread->sched_flags & TH_SFLAG_PROMOTED) == 0);
 595         assert((thread->sched_flags & TH_SFLAG_THREAD_GROUP_AUTO_JOIN) == 0);
 596         assert(thread->th_work_interval_flags == TH_WORK_INTERVAL_FLAGS_NONE);
 597         assert(thread->kern_promotion_schedpri == 0);
 598         assert(thread->waiting_for_mutex == NULL);
 599         assert(thread->rwlock_count == 0);
 600         assert(thread->handoff_thread == THREAD_NULL);
 601         assert(thread->th_work_interval == NULL);
 602
 603         thread_unlock(thread);
 604         /* splsched */
 605
 606         thread_block((thread_continue_t)thread_terminate_continue);
 607         /*NOTREACHED*/
 608 }
 609
 610 static bool
 611 thread_ref_release(thread_t thread)
 612 {
 613         if (thread == THREAD_NULL) {
 614                 return false;
 615         }
 616
 617         assert_thread_magic(thread);
 618
 619         return os_ref_release(&thread->ref_count) == 0;
 620 }
 621
 622 /* Drop a thread refcount safely without triggering a zfree */
 623 void
 624 thread_deallocate_safe(thread_t thread)
 625 {
 626         if (__improbable(thread_ref_release(thread))) {
 627                 /* enqueue the thread for thread deallocate deamon to call thread_deallocate_complete */
 628                 thread_deallocate_enqueue(thread);
 629         }
 630 }
 631
 632 void
 633 thread_deallocate(thread_t thread)
 634 {
 635         if (__improbable(thread_ref_release(thread))) {
 636                 thread_deallocate_complete(thread);
 637         }
 638 }
 639
 640 void
 641 thread_deallocate_complete(
 642         thread_t                        thread)
 643 {
 644         task_t                          task;
 645
 646         assert_thread_magic(thread);
 647
 648         assert(os_ref_get_count(&thread->ref_count) == 0);
 649
 650         if (!(thread->state & TH_TERMINATE2)) {
 651                 panic("thread_deallocate: thread not properly terminated\n");
 652         }
 653
 654         assert(thread->runq == PROCESSOR_NULL);
 655
 656 #if KPC
 657         kpc_thread_destroy(thread);
 658 #endif
 659
 660         ipc_thread_terminate(thread);
 661
 662         proc_thread_qos_deallocate(thread);
 663
 664         task = thread->task;
 665
 666 #ifdef MACH_BSD
 667         {
 668                 void *ut = thread->uthread;
 669
 670                 thread->uthread = NULL;
 671                 uthread_zone_free(ut);
 672         }
 673 #endif /* MACH_BSD */
 674
 675         if (thread->t_ledger) {
 676                 ledger_dereference(thread->t_ledger);
 677         }
 678         if (thread->t_threadledger) {
 679                 ledger_dereference(thread->t_threadledger);
 680         }
 681
 682         assert(thread->turnstile != TURNSTILE_NULL);
 683         if (thread->turnstile) {
 684                 turnstile_deallocate(thread->turnstile);
 685         }
 686
 687         if (IPC_VOUCHER_NULL != thread->ith_voucher) {
 688                 ipc_voucher_release(thread->ith_voucher);
 689         }
 690
 691         if (thread->thread_io_stats) {
 692                 kheap_free(KHEAP_DATA_BUFFERS, thread->thread_io_stats,
 693                     sizeof(struct io_stat_info));
 694         }
 695
 696         if (thread->kernel_stack != 0) {
 697                 stack_free(thread);
 698         }
 699
 700         lck_mtx_destroy(&thread->mutex, &thread_lck_grp);
 701         machine_thread_destroy(thread);
 702
 703         task_deallocate(task);
 704
 705 #if MACH_ASSERT
 706         assert_thread_magic(thread);
 707         thread->thread_magic = 0;
 708 #endif /* MACH_ASSERT */
 709
 710         lck_mtx_lock(&tasks_threads_lock);
 711         assert(terminated_threads_count > 0);
 712         queue_remove(&terminated_threads, thread, thread_t, threads);
 713         terminated_threads_count--;
 714         lck_mtx_unlock(&tasks_threads_lock);
 715
 716         zfree(thread_zone, thread);
 717 }
 718
 719 /*
 720  *      thread_inspect_deallocate:
 721  *
 722  *      Drop a thread inspection reference.
 723  */
 724 void
 725 thread_inspect_deallocate(
 726         thread_inspect_t                thread_inspect)
 727 {
 728         return thread_deallocate((thread_t)thread_inspect);
 729 }
 730
 731 /*
 732  *      thread_read_deallocate:
 733  *
 734  *      Drop a reference on thread read port.
 735  */
 736 void
 737 thread_read_deallocate(
 738         thread_read_t                thread_read)
 739 {
 740         return thread_deallocate((thread_t)thread_read);
 741 }
 742
 743
 744 /*
 745  *      thread_exception_queue_invoke:
 746  *
 747  *      Deliver EXC_{RESOURCE,GUARD} exception
 748  */
 749 static void
 750 thread_exception_queue_invoke(mpsc_queue_chain_t elm,
 751     __assert_only mpsc_daemon_queue_t dq)
 752 {
 753         struct thread_exception_elt *elt;
 754         task_t task;
 755         thread_t thread;
 756         exception_type_t etype;
 757
 758         assert(dq == &thread_exception_queue);
 759         elt = mpsc_queue_element(elm, struct thread_exception_elt, link);
 760
 761         etype = elt->exception_type;
 762         task = elt->exception_task;
 763         thread = elt->exception_thread;
 764         assert_thread_magic(thread);
 765
 766         kfree(elt, sizeof(*elt));
 767
 768         /* wait for all the threads in the task to terminate */
 769         task_lock(task);
 770         task_wait_till_threads_terminate_locked(task);
 771         task_unlock(task);
 772
 773         /* Consumes the task ref returned by task_generate_corpse_internal */
 774         task_deallocate(task);
 775         /* Consumes the thread ref returned by task_generate_corpse_internal */
 776         thread_deallocate(thread);
 777
 778         /* Deliver the notification, also clears the corpse. */
 779         task_deliver_crash_notification(task, thread, etype, 0);
 780 }
 781
 782 /*
 783  *      thread_exception_enqueue:
 784  *
 785  *      Enqueue a corpse port to be delivered an EXC_{RESOURCE,GUARD}.
 786  */
 787 void
 788 thread_exception_enqueue(
 789         task_t          task,
 790         thread_t        thread,
 791         exception_type_t etype)
 792 {
 793         assert(EXC_RESOURCE == etype || EXC_GUARD == etype);
 794         struct thread_exception_elt *elt = kalloc(sizeof(*elt));
 795         elt->exception_type = etype;
 796         elt->exception_task = task;
 797         elt->exception_thread = thread;
 798
 799         mpsc_daemon_enqueue(&thread_exception_queue, &elt->link,
 800             MPSC_QUEUE_DISABLE_PREEMPTION);
 801 }
 802
 803 /*
 804  *      thread_copy_resource_info
 805  *
 806  *      Copy the resource info counters from source
 807  *      thread to destination thread.
 808  */
 809 void
 810 thread_copy_resource_info(
 811         thread_t dst_thread,
 812         thread_t src_thread)
 813 {
 814         dst_thread->c_switch = src_thread->c_switch;
 815         dst_thread->p_switch = src_thread->p_switch;
 816         dst_thread->ps_switch = src_thread->ps_switch;
 817         dst_thread->precise_user_kernel_time = src_thread->precise_user_kernel_time;
 818         dst_thread->user_timer = src_thread->user_timer;
 819         dst_thread->user_timer_save = src_thread->user_timer_save;
 820         dst_thread->system_timer = src_thread->system_timer;
 821         dst_thread->system_timer_save = src_thread->system_timer_save;
 822         dst_thread->runnable_timer = src_thread->runnable_timer;
 823         dst_thread->vtimer_user_save = src_thread->vtimer_user_save;
 824         dst_thread->vtimer_prof_save = src_thread->vtimer_prof_save;
 825         dst_thread->vtimer_rlim_save = src_thread->vtimer_rlim_save;
 826         dst_thread->vtimer_qos_save = src_thread->vtimer_qos_save;
 827         dst_thread->syscalls_unix = src_thread->syscalls_unix;
 828         dst_thread->syscalls_mach = src_thread->syscalls_mach;
 829         ledger_rollup(dst_thread->t_threadledger, src_thread->t_threadledger);
 830         *dst_thread->thread_io_stats = *src_thread->thread_io_stats;
 831 }
 832
 833 static void
 834 thread_terminate_queue_invoke(mpsc_queue_chain_t e,
 835     __assert_only mpsc_daemon_queue_t dq)
 836 {
 837         thread_t thread = mpsc_queue_element(e, struct thread, mpsc_links);
 838         task_t task = thread->task;
 839
 840         assert(dq == &thread_terminate_queue);
 841
 842         task_lock(task);
 843
 844         /*
 845          * if marked for crash reporting, skip reaping.
 846          * The corpse delivery thread will clear bit and enqueue
 847          * for reaping when done
 848          *
 849          * Note: the inspection field is set under the task lock
 850          *
 851          * FIXME[mad]: why enqueue for termination before `inspection` is false ?
 852          */
 853         if (__improbable(thread->inspection)) {
 854                 simple_lock(&crashed_threads_lock, &thread_lck_grp);
 855                 task_unlock(task);
 856
 857                 enqueue_tail(&crashed_threads_queue, &thread->runq_links);
 858                 simple_unlock(&crashed_threads_lock);
 859                 return;
 860         }
 861
 862
 863         task->total_user_time += timer_grab(&thread->user_timer);
 864         task->total_ptime += timer_grab(&thread->ptime);
 865         task->total_runnable_time += timer_grab(&thread->runnable_timer);
 866         if (thread->precise_user_kernel_time) {
 867                 task->total_system_time += timer_grab(&thread->system_timer);
 868         } else {
 869                 task->total_user_time += timer_grab(&thread->system_timer);
 870         }
 871
 872         task->c_switch += thread->c_switch;
 873         task->p_switch += thread->p_switch;
 874         task->ps_switch += thread->ps_switch;
 875
 876         task->syscalls_unix += thread->syscalls_unix;
 877         task->syscalls_mach += thread->syscalls_mach;
 878
 879         task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
 880         task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
 881         task->task_gpu_ns += ml_gpu_stat(thread);
 882         task->task_energy += ml_energy_stat(thread);
 883         task->decompressions += thread->decompressions;
 884
 885 #if MONOTONIC
 886         mt_terminate_update(task, thread);
 887 #endif /* MONOTONIC */
 888
 889         thread_update_qos_cpu_time(thread);
 890
 891         queue_remove(&task->threads, thread, thread_t, task_threads);
 892         task->thread_count--;
 893
 894         /*
 895          * If the task is being halted, and there is only one thread
 896          * left in the task after this one, then wakeup that thread.
 897          */
 898         if (task->thread_count == 1 && task->halting) {
 899                 thread_wakeup((event_t)&task->halting);
 900         }
 901
 902         task_unlock(task);
 903
 904         lck_mtx_lock(&tasks_threads_lock);
 905         queue_remove(&threads, thread, thread_t, threads);
 906         threads_count--;
 907         queue_enter(&terminated_threads, thread, thread_t, threads);
 908         terminated_threads_count++;
 909         lck_mtx_unlock(&tasks_threads_lock);
 910
 911         thread_deallocate(thread);
 912 }
 913
 914 static void
 915 thread_deallocate_queue_invoke(mpsc_queue_chain_t e,
 916     __assert_only mpsc_daemon_queue_t dq)
 917 {
 918         thread_t thread = mpsc_queue_element(e, struct thread, mpsc_links);
 919
 920         assert(dq == &thread_deallocate_queue);
 921
 922         thread_deallocate_complete(thread);
 923 }
 924
 925 /*
 926  *      thread_terminate_enqueue:
 927  *
 928  *      Enqueue a terminating thread for final disposition.
 929  *
 930  *      Called at splsched.
 931  */
 932 void
 933 thread_terminate_enqueue(
 934         thread_t                thread)
 935 {
 936         KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE, thread->thread_id);
 937
 938         mpsc_daemon_enqueue(&thread_terminate_queue, &thread->mpsc_links,
 939             MPSC_QUEUE_DISABLE_PREEMPTION);
 940 }
 941
 942 /*
 943  *      thread_deallocate_enqueue:
 944  *
 945  *      Enqueue a thread for final deallocation.
 946  */
 947 static void
 948 thread_deallocate_enqueue(
 949         thread_t                thread)
 950 {
 951         mpsc_daemon_enqueue(&thread_deallocate_queue, &thread->mpsc_links,
 952             MPSC_QUEUE_DISABLE_PREEMPTION);
 953 }
 954
 955 /*
 956  * thread_terminate_crashed_threads:
 957  * walk the list of crashed threads and put back set of threads
 958  * who are no longer being inspected.
 959  */
 960 void
 961 thread_terminate_crashed_threads(void)
 962 {
 963         thread_t th_remove;
 964
 965         simple_lock(&crashed_threads_lock, &thread_lck_grp);
 966         /*
 967          * loop through the crashed threads queue
 968          * to put any threads that are not being inspected anymore
 969          */
 970
 971         qe_foreach_element_safe(th_remove, &crashed_threads_queue, runq_links) {
 972                 /* make sure current_thread is never in crashed queue */
 973                 assert(th_remove != current_thread());
 974
 975                 if (th_remove->inspection == FALSE) {
 976                         remqueue(&th_remove->runq_links);
 977                         mpsc_daemon_enqueue(&thread_terminate_queue, &th_remove->mpsc_links,
 978                             MPSC_QUEUE_NONE);
 979                 }
 980         }
 981
 982         simple_unlock(&crashed_threads_lock);
 983 }
 984
 985 /*
 986  *      thread_stack_queue_invoke:
 987  *
 988  *      Perform stack allocation as required due to
 989  *      invoke failures.
 990  */
 991 static void
 992 thread_stack_queue_invoke(mpsc_queue_chain_t elm,
 993     __assert_only mpsc_daemon_queue_t dq)
 994 {
 995         thread_t thread = mpsc_queue_element(elm, struct thread, mpsc_links);
 996
 997         assert(dq == &thread_stack_queue);
 998
 999         /* allocate stack with interrupts enabled so that we can call into VM */
1000         stack_alloc(thread);
1001
1002         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_WAIT) | DBG_FUNC_END, thread_tid(thread), 0, 0, 0, 0);
1003
1004         spl_t s = splsched();
1005         thread_lock(thread);
1006         thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1007         thread_unlock(thread);
1008         splx(s);
1009 }
1010
1011 /*
1012  *      thread_stack_enqueue:
1013  *
1014  *      Enqueue a thread for stack allocation.
1015  *
1016  *      Called at splsched.
1017  */
1018 void
1019 thread_stack_enqueue(
1020         thread_t                thread)
1021 {
1022         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_WAIT) | DBG_FUNC_START, thread_tid(thread), 0, 0, 0, 0);
1023         assert_thread_magic(thread);
1024
1025         mpsc_daemon_enqueue(&thread_stack_queue, &thread->mpsc_links,
1026             MPSC_QUEUE_DISABLE_PREEMPTION);
1027 }
1028
1029 void
1030 thread_daemon_init(void)
1031 {
1032         kern_return_t   result;
1033
1034         thread_deallocate_daemon_init();
1035
1036         thread_deallocate_daemon_register_queue(&thread_terminate_queue,
1037             thread_terminate_queue_invoke);
1038
1039         thread_deallocate_daemon_register_queue(&thread_deallocate_queue,
1040             thread_deallocate_queue_invoke);
1041
1042         simple_lock_init(&crashed_threads_lock, 0);
1043         queue_init(&crashed_threads_queue);
1044
1045         result = mpsc_daemon_queue_init_with_thread(&thread_stack_queue,
1046             thread_stack_queue_invoke, BASEPRI_PREEMPT_HIGH,
1047             "daemon.thread-stack");
1048         if (result != KERN_SUCCESS) {
1049                 panic("thread_daemon_init: thread_stack_daemon");
1050         }
1051
1052         result = mpsc_daemon_queue_init_with_thread(&thread_exception_queue,
1053             thread_exception_queue_invoke, MINPRI_KERNEL,
1054             "daemon.thread-exception");
1055         if (result != KERN_SUCCESS) {
1056                 panic("thread_daemon_init: thread_exception_daemon");
1057         }
1058 }
1059
1060 __options_decl(thread_create_internal_options_t, uint32_t, {
1061         TH_OPTION_NONE          = 0x00,
1062         TH_OPTION_NOCRED        = 0x01,
1063         TH_OPTION_NOSUSP        = 0x02,
1064         TH_OPTION_WORKQ         = 0x04,
1065         TH_OPTION_IMMOVABLE     = 0x08,
1066         TH_OPTION_PINNED        = 0x10,
1067 });
1068
1069 /*
1070  * Create a new thread.
1071  * Doesn't start the thread running.
1072  *
1073  * Task and tasks_threads_lock are returned locked on success.
1074  */
1075 static kern_return_t
1076 thread_create_internal(
1077         task_t                                  parent_task,
1078         integer_t                               priority,
1079         thread_continue_t                       continuation,
1080         void                                    *parameter,
1081         thread_create_internal_options_t        options,
1082         thread_t                                *out_thread)
1083 {
1084         thread_t                                new_thread;
1085         static thread_t                         first_thread;
1086         ipc_thread_init_options_t init_options = IPC_THREAD_INIT_NONE;
1087
1088         /*
1089          *      Allocate a thread and initialize static fields
1090          */
1091         if (first_thread == THREAD_NULL) {
1092                 new_thread = first_thread = current_thread();
1093         } else {
1094                 new_thread = (thread_t)zalloc(thread_zone);
1095         }
1096         if (new_thread == THREAD_NULL) {
1097                 return KERN_RESOURCE_SHORTAGE;
1098         }
1099
1100         if (new_thread != first_thread) {
1101                 init_thread_from_template(new_thread);
1102         }
1103
1104         if (options & TH_OPTION_PINNED) {
1105                 init_options |= IPC_THREAD_INIT_PINNED;
1106         }
1107
1108         if (options & TH_OPTION_IMMOVABLE) {
1109                 init_options |= IPC_THREAD_INIT_IMMOVABLE;
1110         }
1111
1112         os_ref_init_count(&new_thread->ref_count, &thread_refgrp, 2);
1113 #if DEBUG || DEVELOPMENT
1114         queue_init(&new_thread->t_temp_alloc_list);
1115 #endif /* DEBUG || DEVELOPMENT */
1116
1117 #ifdef MACH_BSD
1118         new_thread->uthread = uthread_alloc(parent_task, new_thread, (options & TH_OPTION_NOCRED) != 0);
1119         if (new_thread->uthread == NULL) {
1120 #if MACH_ASSERT
1121                 new_thread->thread_magic = 0;
1122 #endif /* MACH_ASSERT */
1123
1124                 zfree(thread_zone, new_thread);
1125                 return KERN_RESOURCE_SHORTAGE;
1126         }
1127 #endif  /* MACH_BSD */
1128
1129         if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) {
1130 #ifdef MACH_BSD
1131                 void *ut = new_thread->uthread;
1132
1133                 new_thread->uthread = NULL;
1134                 /* cred free may not be necessary */
1135                 uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1136                 uthread_cred_free(ut);
1137                 uthread_zone_free(ut);
1138 #endif  /* MACH_BSD */
1139
1140 #if MACH_ASSERT
1141                 new_thread->thread_magic = 0;
1142 #endif /* MACH_ASSERT */
1143
1144                 zfree(thread_zone, new_thread);
1145                 return KERN_FAILURE;
1146         }
1147
1148         new_thread->task = parent_task;
1149
1150         thread_lock_init(new_thread);
1151         wake_lock_init(new_thread);
1152
1153         lck_mtx_init(&new_thread->mutex, &thread_lck_grp, LCK_ATTR_NULL);
1154
1155         ipc_thread_init(new_thread, init_options);
1156
1157         new_thread->continuation = continuation;
1158         new_thread->parameter = parameter;
1159         new_thread->inheritor_flags = TURNSTILE_UPDATE_FLAGS_NONE;
1160         priority_queue_init(&new_thread->sched_inheritor_queue);
1161         priority_queue_init(&new_thread->base_inheritor_queue);
1162 #if CONFIG_SCHED_CLUTCH
1163         priority_queue_entry_init(&new_thread->th_clutch_runq_link);
1164         priority_queue_entry_init(&new_thread->th_clutch_pri_link);
1165 #endif /* CONFIG_SCHED_CLUTCH */
1166
1167 #if CONFIG_SCHED_EDGE
1168         new_thread->th_bound_cluster_enqueued = false;
1169 #endif /* CONFIG_SCHED_EDGE */
1170
1171         /* Allocate I/O Statistics structure */
1172         new_thread->thread_io_stats = kheap_alloc(KHEAP_DATA_BUFFERS,
1173             sizeof(struct io_stat_info), Z_WAITOK | Z_ZERO);
1174         assert(new_thread->thread_io_stats != NULL);
1175
1176 #if KASAN
1177         kasan_init_thread(&new_thread->kasan_data);
1178 #endif
1179
1180 #if CONFIG_KSANCOV
1181         new_thread->ksancov_data = NULL;
1182 #endif
1183
1184 #if CONFIG_IOSCHED
1185         /* Clear out the I/O Scheduling info for AppleFSCompression */
1186         new_thread->decmp_upl = NULL;
1187 #endif /* CONFIG_IOSCHED */
1188
1189         new_thread->thread_region_page_shift = 0;
1190
1191 #if DEVELOPMENT || DEBUG
1192         task_lock(parent_task);
1193         uint16_t thread_limit = parent_task->task_thread_limit;
1194         if (exc_resource_threads_enabled &&
1195             thread_limit > 0 &&
1196             parent_task->thread_count >= thread_limit &&
1197             !parent_task->task_has_crossed_thread_limit &&
1198             !(parent_task->t_flags & TF_CORPSE)) {
1199                 int thread_count = parent_task->thread_count;
1200                 parent_task->task_has_crossed_thread_limit = TRUE;
1201                 task_unlock(parent_task);
1202                 SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(parent_task, thread_count);
1203         } else {
1204                 task_unlock(parent_task);
1205         }
1206 #endif
1207
1208         lck_mtx_lock(&tasks_threads_lock);
1209         task_lock(parent_task);
1210
1211         /*
1212          * Fail thread creation if parent task is being torn down or has too many threads
1213          * If the caller asked for TH_OPTION_NOSUSP, also fail if the parent task is suspended
1214          */
1215         if (parent_task->active == 0 || parent_task->halting ||
1216             (parent_task->suspend_count > 0 && (options & TH_OPTION_NOSUSP) != 0) ||
1217             (parent_task->thread_count >= task_threadmax && parent_task != kernel_task)) {
1218                 task_unlock(parent_task);
1219                 lck_mtx_unlock(&tasks_threads_lock);
1220
1221 #ifdef MACH_BSD
1222                 {
1223                         void *ut = new_thread->uthread;
1224
1225                         new_thread->uthread = NULL;
1226                         uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1227                         /* cred free may not be necessary */
1228                         uthread_cred_free(ut);
1229                         uthread_zone_free(ut);
1230                 }
1231 #endif  /* MACH_BSD */
1232                 ipc_thread_disable(new_thread);
1233                 ipc_thread_terminate(new_thread);
1234                 kheap_free(KHEAP_DATA_BUFFERS, new_thread->thread_io_stats,
1235                     sizeof(struct io_stat_info));
1236                 lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp);
1237                 machine_thread_destroy(new_thread);
1238                 zfree(thread_zone, new_thread);
1239                 return KERN_FAILURE;
1240         }
1241
1242         /* Protected by the tasks_threads_lock */
1243         new_thread->thread_id = ++thread_unique_id;
1244
1245         /* New threads inherit any default state on the task */
1246         machine_thread_inherit_taskwide(new_thread, parent_task);
1247
1248         task_reference_internal(parent_task);
1249
1250         if (new_thread->task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
1251                 /*
1252                  * This task has a per-thread CPU limit; make sure this new thread
1253                  * gets its limit set too, before it gets out of the kernel.
1254                  */
1255                 act_set_astledger(new_thread);
1256         }
1257
1258         /* Instantiate a thread ledger. Do not fail thread creation if ledger creation fails. */
1259         if ((new_thread->t_threadledger = ledger_instantiate(thread_ledger_template,
1260             LEDGER_CREATE_INACTIVE_ENTRIES)) != LEDGER_NULL) {
1261                 ledger_entry_setactive(new_thread->t_threadledger, thread_ledgers.cpu_time);
1262         }
1263
1264         new_thread->t_bankledger = LEDGER_NULL;
1265         new_thread->t_deduct_bank_ledger_time = 0;
1266         new_thread->t_deduct_bank_ledger_energy = 0;
1267
1268         new_thread->t_ledger = new_thread->task->ledger;
1269         if (new_thread->t_ledger) {
1270                 ledger_reference(new_thread->t_ledger);
1271         }
1272
1273 #if defined(CONFIG_SCHED_MULTIQ)
1274         /* Cache the task's sched_group */
1275         new_thread->sched_group = parent_task->sched_group;
1276 #endif /* defined(CONFIG_SCHED_MULTIQ) */
1277
1278         /* Cache the task's map */
1279         new_thread->map = parent_task->map;
1280
1281         timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread);
1282         timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread);
1283
1284 #if KPC
1285         kpc_thread_create(new_thread);
1286 #endif
1287
1288         /* Set the thread's scheduling parameters */
1289         new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task);
1290         new_thread->max_priority = parent_task->max_priority;
1291         new_thread->task_priority = parent_task->priority;
1292
1293 #if CONFIG_THREAD_GROUPS
1294         thread_group_init_thread(new_thread, parent_task);
1295 #endif /* CONFIG_THREAD_GROUPS */
1296
1297         int new_priority = (priority < 0) ? parent_task->priority: priority;
1298         new_priority = (priority < 0)? parent_task->priority: priority;
1299         if (new_priority > new_thread->max_priority) {
1300                 new_priority = new_thread->max_priority;
1301         }
1302 #if !defined(XNU_TARGET_OS_OSX)
1303         if (new_priority < MAXPRI_THROTTLE) {
1304                 new_priority = MAXPRI_THROTTLE;
1305         }
1306 #endif /* !defined(XNU_TARGET_OS_OSX) */
1307
1308         new_thread->importance = new_priority - new_thread->task_priority;
1309
1310         sched_set_thread_base_priority(new_thread, new_priority);
1311
1312 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1313         new_thread->sched_stamp = sched_tick;
1314 #if CONFIG_SCHED_CLUTCH
1315         new_thread->pri_shift = sched_clutch_thread_pri_shift(new_thread, new_thread->th_sched_bucket);
1316 #else /* CONFIG_SCHED_CLUTCH */
1317         new_thread->pri_shift = sched_pri_shifts[new_thread->th_sched_bucket];
1318 #endif /* CONFIG_SCHED_CLUTCH */
1319 #endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) */
1320
1321         if (parent_task->max_priority <= MAXPRI_THROTTLE) {
1322                 sched_thread_mode_demote(new_thread, TH_SFLAG_THROTTLED);
1323         }
1324
1325         thread_policy_create(new_thread);
1326
1327         /* Chain the thread onto the task's list */
1328         queue_enter(&parent_task->threads, new_thread, thread_t, task_threads);
1329         parent_task->thread_count++;
1330
1331         /* So terminating threads don't need to take the task lock to decrement */
1332         os_atomic_inc(&parent_task->active_thread_count, relaxed);
1333
1334         queue_enter(&threads, new_thread, thread_t, threads);
1335         threads_count++;
1336
1337         new_thread->active = TRUE;
1338         if (task_is_a_corpse_fork(parent_task)) {
1339                 /* Set the inspection bit if the task is a corpse fork */
1340                 new_thread->inspection = TRUE;
1341         } else {
1342                 new_thread->inspection = FALSE;
1343         }
1344         new_thread->corpse_dup = FALSE;
1345         new_thread->turnstile = turnstile_alloc();
1346
1347
1348         *out_thread = new_thread;
1349
1350         if (kdebug_enable) {
1351                 long args[4] = {};
1352
1353                 kdbg_trace_data(parent_task->bsd_info, &args[1], &args[3]);
1354
1355                 /*
1356                  * Starting with 26604425, exec'ing creates a new task/thread.
1357                  *
1358                  * NEWTHREAD in the current process has two possible meanings:
1359                  *
1360                  * 1) Create a new thread for this process.
1361                  * 2) Create a new thread for the future process this will become in an
1362                  * exec.
1363                  *
1364                  * To disambiguate these, arg3 will be set to TRUE for case #2.
1365                  *
1366                  * The value we need to find (TPF_EXEC_COPY) is stable in the case of a
1367                  * task exec'ing. The read of t_procflags does not take the proc_lock.
1368                  */
1369                 args[2] = task_is_exec_copy(parent_task) ? 1 : 0;
1370
1371                 KDBG_RELEASE(TRACE_DATA_NEWTHREAD, (uintptr_t)thread_tid(new_thread),
1372                     args[1], args[2], args[3]);
1373
1374                 kdbg_trace_string(parent_task->bsd_info, &args[0], &args[1],
1375                     &args[2], &args[3]);
1376                 KDBG_RELEASE(TRACE_STRING_NEWTHREAD, args[0], args[1], args[2],
1377                     args[3]);
1378         }
1379
1380         DTRACE_PROC1(lwp__create, thread_t, *out_thread);
1381
1382         return KERN_SUCCESS;
1383 }
1384
1385 static kern_return_t
1386 thread_create_with_options_internal(
1387         task_t                            task,
1388         thread_t                          *new_thread,
1389         boolean_t                         from_user,
1390         thread_create_internal_options_t  options,
1391         thread_continue_t                 continuation)
1392 {
1393         kern_return_t           result;
1394         thread_t                thread;
1395
1396         if (task == TASK_NULL || task == kernel_task) {
1397                 return KERN_INVALID_ARGUMENT;
1398         }
1399
1400 #if CONFIG_MACF
1401         if (from_user && current_task() != task &&
1402             mac_proc_check_remote_thread_create(task, -1, NULL, 0) != 0) {
1403                 return KERN_DENIED;
1404         }
1405 #endif
1406
1407         result = thread_create_internal(task, -1, continuation, NULL, options, &thread);
1408         if (result != KERN_SUCCESS) {
1409                 return result;
1410         }
1411
1412         thread->user_stop_count = 1;
1413         thread_hold(thread);
1414         if (task->suspend_count > 0) {
1415                 thread_hold(thread);
1416         }
1417
1418         if (from_user) {
1419                 extmod_statistics_incr_thread_create(task);
1420         }
1421
1422         task_unlock(task);
1423         lck_mtx_unlock(&tasks_threads_lock);
1424
1425         *new_thread = thread;
1426
1427         return KERN_SUCCESS;
1428 }
1429
1430 /* No prototype, since task_server.h has the _from_user version if KERNEL_SERVER */
1431 kern_return_t
1432 thread_create(
1433         task_t                          task,
1434         thread_t                        *new_thread);
1435
1436 kern_return_t
1437 thread_create(
1438         task_t                          task,
1439         thread_t                        *new_thread)
1440 {
1441         return thread_create_with_options_internal(task, new_thread, FALSE, TH_OPTION_NONE,
1442                    (thread_continue_t)thread_bootstrap_return);
1443 }
1444
1445 /*
1446  * Create a thread that has its itk_self pinned
1447  * Deprecated, should be cleanup once rdar://70892168 lands
1448  */
1449 kern_return_t
1450 thread_create_pinned(
1451         task_t                          task,
1452         thread_t                        *new_thread)
1453 {
1454         return thread_create_with_options_internal(task, new_thread, FALSE,
1455                    TH_OPTION_PINNED | TH_OPTION_IMMOVABLE, (thread_continue_t)thread_bootstrap_return);
1456 }
1457
1458 kern_return_t
1459 thread_create_immovable(
1460         task_t                          task,
1461         thread_t                        *new_thread)
1462 {
1463         return thread_create_with_options_internal(task, new_thread, FALSE,
1464                    TH_OPTION_IMMOVABLE, (thread_continue_t)thread_bootstrap_return);
1465 }
1466
1467 kern_return_t
1468 thread_create_from_user(
1469         task_t                          task,
1470         thread_t                        *new_thread)
1471 {
1472         return thread_create_with_options_internal(task, new_thread, TRUE, TH_OPTION_NONE,
1473                    (thread_continue_t)thread_bootstrap_return);
1474 }
1475
1476 kern_return_t
1477 thread_create_with_continuation(
1478         task_t                          task,
1479         thread_t                        *new_thread,
1480         thread_continue_t               continuation)
1481 {
1482         return thread_create_with_options_internal(task, new_thread, FALSE, TH_OPTION_NONE, continuation);
1483 }
1484
1485 /*
1486  * Create a thread that is already started, but is waiting on an event
1487  */
1488 static kern_return_t
1489 thread_create_waiting_internal(
1490         task_t                  task,
1491         thread_continue_t       continuation,
1492         event_t                 event,
1493         block_hint_t            block_hint,
1494         int                     options,
1495         thread_t                *new_thread)
1496 {
1497         kern_return_t result;
1498         thread_t thread;
1499
1500         if (task == TASK_NULL || task == kernel_task) {
1501                 return KERN_INVALID_ARGUMENT;
1502         }
1503
1504         result = thread_create_internal(task, -1, continuation, NULL,
1505             options, &thread);
1506         if (result != KERN_SUCCESS) {
1507                 return result;
1508         }
1509
1510         /* note no user_stop_count or thread_hold here */
1511
1512         if (task->suspend_count > 0) {
1513                 thread_hold(thread);
1514         }
1515
1516         thread_mtx_lock(thread);
1517         thread_set_pending_block_hint(thread, block_hint);
1518         if (options & TH_OPTION_WORKQ) {
1519                 thread->static_param = true;
1520                 event = workq_thread_init_and_wq_lock(task, thread);
1521         }
1522         thread_start_in_assert_wait(thread, event, THREAD_INTERRUPTIBLE);
1523         thread_mtx_unlock(thread);
1524
1525         task_unlock(task);
1526         lck_mtx_unlock(&tasks_threads_lock);
1527
1528         *new_thread = thread;
1529
1530         return KERN_SUCCESS;
1531 }
1532
1533 kern_return_t
1534 thread_create_waiting(
1535         task_t                          task,
1536         thread_continue_t               continuation,
1537         event_t                         event,
1538         th_create_waiting_options_t     options,
1539         thread_t                        *new_thread)
1540 {
1541         thread_create_internal_options_t ci_options = TH_OPTION_NONE;
1542
1543         assert((options & ~TH_CREATE_WAITING_OPTION_MASK) == 0);
1544         if (options & TH_CREATE_WAITING_OPTION_PINNED) {
1545                 ci_options |= TH_OPTION_PINNED;
1546         }
1547         if (options & TH_CREATE_WAITING_OPTION_IMMOVABLE) {
1548                 ci_options |= TH_OPTION_IMMOVABLE;
1549         }
1550
1551         return thread_create_waiting_internal(task, continuation, event,
1552                    kThreadWaitNone, ci_options, new_thread);
1553 }
1554
1555
1556 static kern_return_t
1557 thread_create_running_internal2(
1558         task_t         task,
1559         int                     flavor,
1560         thread_state_t          new_state,
1561         mach_msg_type_number_t  new_state_count,
1562         thread_t                                *new_thread,
1563         boolean_t                               from_user)
1564 {
1565         kern_return_t  result;
1566         thread_t                                thread;
1567
1568         if (task == TASK_NULL || task == kernel_task) {
1569                 return KERN_INVALID_ARGUMENT;
1570         }
1571
1572 #if CONFIG_MACF
1573         if (from_user && current_task() != task &&
1574             mac_proc_check_remote_thread_create(task, flavor, new_state, new_state_count) != 0) {
1575                 return KERN_DENIED;
1576         }
1577 #endif
1578
1579         result = thread_create_internal(task, -1,
1580             (thread_continue_t)thread_bootstrap_return, NULL,
1581             TH_OPTION_NONE, &thread);
1582         if (result != KERN_SUCCESS) {
1583                 return result;
1584         }
1585
1586         if (task->suspend_count > 0) {
1587                 thread_hold(thread);
1588         }
1589
1590         if (from_user) {
1591                 result = machine_thread_state_convert_from_user(thread, flavor,
1592                     new_state, new_state_count);
1593         }
1594         if (result == KERN_SUCCESS) {
1595                 result = machine_thread_set_state(thread, flavor, new_state,
1596                     new_state_count);
1597         }
1598         if (result != KERN_SUCCESS) {
1599                 task_unlock(task);
1600                 lck_mtx_unlock(&tasks_threads_lock);
1601
1602                 thread_terminate(thread);
1603                 thread_deallocate(thread);
1604                 return result;
1605         }
1606
1607         thread_mtx_lock(thread);
1608         thread_start(thread);
1609         thread_mtx_unlock(thread);
1610
1611         if (from_user) {
1612                 extmod_statistics_incr_thread_create(task);
1613         }
1614
1615         task_unlock(task);
1616         lck_mtx_unlock(&tasks_threads_lock);
1617
1618         *new_thread = thread;
1619
1620         return result;
1621 }
1622
1623 /* Prototype, see justification above */
1624 kern_return_t
1625 thread_create_running(
1626         task_t         task,
1627         int                     flavor,
1628         thread_state_t          new_state,
1629         mach_msg_type_number_t  new_state_count,
1630         thread_t                                *new_thread);
1631
1632 kern_return_t
1633 thread_create_running(
1634         task_t         task,
1635         int                     flavor,
1636         thread_state_t          new_state,
1637         mach_msg_type_number_t  new_state_count,
1638         thread_t                                *new_thread)
1639 {
1640         return thread_create_running_internal2(
1641                 task, flavor, new_state, new_state_count,
1642                 new_thread, FALSE);
1643 }
1644
1645 kern_return_t
1646 thread_create_running_from_user(
1647         task_t         task,
1648         int                     flavor,
1649         thread_state_t          new_state,
1650         mach_msg_type_number_t  new_state_count,
1651         thread_t                                *new_thread)
1652 {
1653         return thread_create_running_internal2(
1654                 task, flavor, new_state, new_state_count,
1655                 new_thread, TRUE);
1656 }
1657
1658 kern_return_t
1659 thread_create_workq_waiting(
1660         task_t              task,
1661         thread_continue_t   continuation,
1662         thread_t            *new_thread)
1663 {
1664         /*
1665          * Create thread, but don't pin control port just yet, in case someone calls
1666          * task_threads() and deallocates pinned port before kernel copyout happens,
1667          * which will result in pinned port guard exception. Instead, pin and make
1668          * it immovable atomically at copyout during workq_setup_and_run().
1669          */
1670         int options = TH_OPTION_NOCRED | TH_OPTION_NOSUSP | TH_OPTION_WORKQ | TH_OPTION_IMMOVABLE;
1671         return thread_create_waiting_internal(task, continuation, NULL,
1672                    kThreadWaitParkedWorkQueue, options, new_thread);
1673 }
1674
1675 /*
1676  *      kernel_thread_create:
1677  *
1678  *      Create a thread in the kernel task
1679  *      to execute in kernel context.
1680  */
1681 kern_return_t
1682 kernel_thread_create(
1683         thread_continue_t       continuation,
1684         void                            *parameter,
1685         integer_t                       priority,
1686         thread_t                        *new_thread)
1687 {
1688         kern_return_t           result;
1689         thread_t                        thread;
1690         task_t                          task = kernel_task;
1691
1692         result = thread_create_internal(task, priority, continuation, parameter,
1693             TH_OPTION_NOCRED | TH_OPTION_NONE, &thread);
1694         if (result != KERN_SUCCESS) {
1695                 return result;
1696         }
1697
1698         task_unlock(task);
1699         lck_mtx_unlock(&tasks_threads_lock);
1700
1701         stack_alloc(thread);
1702         assert(thread->kernel_stack != 0);
1703 #if !defined(XNU_TARGET_OS_OSX)
1704         if (priority > BASEPRI_KERNEL)
1705 #endif
1706         thread->reserved_stack = thread->kernel_stack;
1707
1708         if (debug_task & 1) {
1709                 kprintf("kernel_thread_create: thread = %p continuation = %p\n", thread, continuation);
1710         }
1711         *new_thread = thread;
1712
1713         return result;
1714 }
1715
1716 kern_return_t
1717 kernel_thread_start_priority(
1718         thread_continue_t       continuation,
1719         void                            *parameter,
1720         integer_t                       priority,
1721         thread_t                        *new_thread)
1722 {
1723         kern_return_t   result;
1724         thread_t                thread;
1725
1726         result = kernel_thread_create(continuation, parameter, priority, &thread);
1727         if (result != KERN_SUCCESS) {
1728                 return result;
1729         }
1730
1731         *new_thread = thread;
1732
1733         thread_mtx_lock(thread);
1734         thread_start(thread);
1735         thread_mtx_unlock(thread);
1736
1737         return result;
1738 }
1739
1740 kern_return_t
1741 kernel_thread_start(
1742         thread_continue_t       continuation,
1743         void                            *parameter,
1744         thread_t                        *new_thread)
1745 {
1746         return kernel_thread_start_priority(continuation, parameter, -1, new_thread);
1747 }
1748
1749 /* Separated into helper function so it can be used by THREAD_BASIC_INFO and THREAD_EXTENDED_INFO */
1750 /* it is assumed that the thread is locked by the caller */
1751 static void
1752 retrieve_thread_basic_info(thread_t thread, thread_basic_info_t basic_info)
1753 {
1754         int     state, flags;
1755
1756         /* fill in info */
1757
1758         thread_read_times(thread, &basic_info->user_time,
1759             &basic_info->system_time, NULL);
1760
1761         /*
1762          *      Update lazy-evaluated scheduler info because someone wants it.
1763          */
1764         if (SCHED(can_update_priority)(thread)) {
1765                 SCHED(update_priority)(thread);
1766         }
1767
1768         basic_info->sleep_time = 0;
1769
1770         /*
1771          *      To calculate cpu_usage, first correct for timer rate,
1772          *      then for 5/8 ageing.  The correction factor [3/5] is
1773          *      (1/(5/8) - 1).
1774          */
1775         basic_info->cpu_usage = 0;
1776 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1777         if (sched_tick_interval) {
1778                 basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage
1779                     * TH_USAGE_SCALE) /     sched_tick_interval);
1780                 basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
1781         }
1782 #endif
1783
1784         if (basic_info->cpu_usage > TH_USAGE_SCALE) {
1785                 basic_info->cpu_usage = TH_USAGE_SCALE;
1786         }
1787
1788         basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
1789             POLICY_TIMESHARE: POLICY_RR);
1790
1791         flags = 0;
1792         if (thread->options & TH_OPT_IDLE_THREAD) {
1793                 flags |= TH_FLAGS_IDLE;
1794         }
1795
1796         if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
1797                 flags |= TH_FLAGS_GLOBAL_FORCED_IDLE;
1798         }
1799
1800         if (!thread->kernel_stack) {
1801                 flags |= TH_FLAGS_SWAPPED;
1802         }
1803
1804         state = 0;
1805         if (thread->state & TH_TERMINATE) {
1806                 state = TH_STATE_HALTED;
1807         } else if (thread->state & TH_RUN) {
1808                 state = TH_STATE_RUNNING;
1809         } else if (thread->state & TH_UNINT) {
1810                 state = TH_STATE_UNINTERRUPTIBLE;
1811         } else if (thread->state & TH_SUSP) {
1812                 state = TH_STATE_STOPPED;
1813         } else if (thread->state & TH_WAIT) {
1814                 state = TH_STATE_WAITING;
1815         }
1816
1817         basic_info->run_state = state;
1818         basic_info->flags = flags;
1819
1820         basic_info->suspend_count = thread->user_stop_count;
1821
1822         return;
1823 }
1824
1825 kern_return_t
1826 thread_info_internal(
1827         thread_t                thread,
1828         thread_flavor_t                 flavor,
1829         thread_info_t                   thread_info_out,        /* ptr to OUT array */
1830         mach_msg_type_number_t  *thread_info_count)     /*IN/OUT*/
1831 {
1832         spl_t   s;
1833
1834         if (thread == THREAD_NULL) {
1835                 return KERN_INVALID_ARGUMENT;
1836         }
1837
1838         if (flavor == THREAD_BASIC_INFO) {
1839                 if (*thread_info_count < THREAD_BASIC_INFO_COUNT) {
1840                         return KERN_INVALID_ARGUMENT;
1841                 }
1842
1843                 s = splsched();
1844                 thread_lock(thread);
1845
1846                 retrieve_thread_basic_info(thread, (thread_basic_info_t) thread_info_out);
1847
1848                 thread_unlock(thread);
1849                 splx(s);
1850
1851                 *thread_info_count = THREAD_BASIC_INFO_COUNT;
1852
1853                 return KERN_SUCCESS;
1854         } else if (flavor == THREAD_IDENTIFIER_INFO) {
1855                 thread_identifier_info_t        identifier_info;
1856
1857                 if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT) {
1858                         return KERN_INVALID_ARGUMENT;
1859                 }
1860
1861                 identifier_info = __IGNORE_WCASTALIGN((thread_identifier_info_t)thread_info_out);
1862
1863                 s = splsched();
1864                 thread_lock(thread);
1865
1866                 identifier_info->thread_id = thread->thread_id;
1867                 identifier_info->thread_handle = thread->machine.cthread_self;
1868                 identifier_info->dispatch_qaddr = thread_dispatchqaddr(thread);
1869
1870                 thread_unlock(thread);
1871                 splx(s);
1872                 return KERN_SUCCESS;
1873         } else if (flavor == THREAD_SCHED_TIMESHARE_INFO) {
1874                 policy_timeshare_info_t         ts_info;
1875
1876                 if (*thread_info_count < POLICY_TIMESHARE_INFO_COUNT) {
1877                         return KERN_INVALID_ARGUMENT;
1878                 }
1879
1880                 ts_info = (policy_timeshare_info_t)thread_info_out;
1881
1882                 s = splsched();
1883                 thread_lock(thread);
1884
1885                 if (thread->sched_mode != TH_MODE_TIMESHARE) {
1886                         thread_unlock(thread);
1887                         splx(s);
1888                         return KERN_INVALID_POLICY;
1889                 }
1890
1891                 ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
1892                 if (ts_info->depressed) {
1893                         ts_info->base_priority = DEPRESSPRI;
1894                         ts_info->depress_priority = thread->base_pri;
1895                 } else {
1896                         ts_info->base_priority = thread->base_pri;
1897                         ts_info->depress_priority = -1;
1898                 }
1899
1900                 ts_info->cur_priority = thread->sched_pri;
1901                 ts_info->max_priority = thread->max_priority;
1902
1903                 thread_unlock(thread);
1904                 splx(s);
1905
1906                 *thread_info_count = POLICY_TIMESHARE_INFO_COUNT;
1907
1908                 return KERN_SUCCESS;
1909         } else if (flavor == THREAD_SCHED_FIFO_INFO) {
1910                 if (*thread_info_count < POLICY_FIFO_INFO_COUNT) {
1911                         return KERN_INVALID_ARGUMENT;
1912                 }
1913
1914                 return KERN_INVALID_POLICY;
1915         } else if (flavor == THREAD_SCHED_RR_INFO) {
1916                 policy_rr_info_t                        rr_info;
1917                 uint32_t quantum_time;
1918                 uint64_t quantum_ns;
1919
1920                 if (*thread_info_count < POLICY_RR_INFO_COUNT) {
1921                         return KERN_INVALID_ARGUMENT;
1922                 }
1923
1924                 rr_info = (policy_rr_info_t) thread_info_out;
1925
1926                 s = splsched();
1927                 thread_lock(thread);
1928
1929                 if (thread->sched_mode == TH_MODE_TIMESHARE) {
1930                         thread_unlock(thread);
1931                         splx(s);
1932
1933                         return KERN_INVALID_POLICY;
1934                 }
1935
1936                 rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
1937                 if (rr_info->depressed) {
1938                         rr_info->base_priority = DEPRESSPRI;
1939                         rr_info->depress_priority = thread->base_pri;
1940                 } else {
1941                         rr_info->base_priority = thread->base_pri;
1942                         rr_info->depress_priority = -1;
1943                 }
1944
1945                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
1946                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
1947
1948                 rr_info->max_priority = thread->max_priority;
1949                 rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
1950
1951                 thread_unlock(thread);
1952                 splx(s);
1953
1954                 *thread_info_count = POLICY_RR_INFO_COUNT;
1955
1956                 return KERN_SUCCESS;
1957         } else if (flavor == THREAD_EXTENDED_INFO) {
1958                 thread_basic_info_data_t        basic_info;
1959                 thread_extended_info_t          extended_info = __IGNORE_WCASTALIGN((thread_extended_info_t)thread_info_out);
1960
1961                 if (*thread_info_count < THREAD_EXTENDED_INFO_COUNT) {
1962                         return KERN_INVALID_ARGUMENT;
1963                 }
1964
1965                 s = splsched();
1966                 thread_lock(thread);
1967
1968                 /* NOTE: This mimics fill_taskthreadinfo(), which is the function used by proc_pidinfo() for
1969                  * the PROC_PIDTHREADINFO flavor (which can't be used on corpses)
1970                  */
1971                 retrieve_thread_basic_info(thread, &basic_info);
1972                 extended_info->pth_user_time = (((uint64_t)basic_info.user_time.seconds * NSEC_PER_SEC) + ((uint64_t)basic_info.user_time.microseconds * NSEC_PER_USEC));
1973                 extended_info->pth_system_time = (((uint64_t)basic_info.system_time.seconds * NSEC_PER_SEC) + ((uint64_t)basic_info.system_time.microseconds * NSEC_PER_USEC));
1974
1975                 extended_info->pth_cpu_usage = basic_info.cpu_usage;
1976                 extended_info->pth_policy = basic_info.policy;
1977                 extended_info->pth_run_state = basic_info.run_state;
1978                 extended_info->pth_flags = basic_info.flags;
1979                 extended_info->pth_sleep_time = basic_info.sleep_time;
1980                 extended_info->pth_curpri = thread->sched_pri;
1981                 extended_info->pth_priority = thread->base_pri;
1982                 extended_info->pth_maxpriority = thread->max_priority;
1983
1984                 bsd_getthreadname(thread->uthread, extended_info->pth_name);
1985
1986                 thread_unlock(thread);
1987                 splx(s);
1988
1989                 *thread_info_count = THREAD_EXTENDED_INFO_COUNT;
1990
1991                 return KERN_SUCCESS;
1992         } else if (flavor == THREAD_DEBUG_INFO_INTERNAL) {
1993 #if DEVELOPMENT || DEBUG
1994                 thread_debug_info_internal_t dbg_info;
1995                 if (*thread_info_count < THREAD_DEBUG_INFO_INTERNAL_COUNT) {
1996                         return KERN_NOT_SUPPORTED;
1997                 }
1998
1999                 if (thread_info_out == NULL) {
2000                         return KERN_INVALID_ARGUMENT;
2001                 }
2002
2003                 dbg_info = __IGNORE_WCASTALIGN((thread_debug_info_internal_t)thread_info_out);
2004                 dbg_info->page_creation_count = thread->t_page_creation_count;
2005
2006                 *thread_info_count = THREAD_DEBUG_INFO_INTERNAL_COUNT;
2007                 return KERN_SUCCESS;
2008 #endif /* DEVELOPMENT || DEBUG */
2009                 return KERN_NOT_SUPPORTED;
2010         }
2011
2012         return KERN_INVALID_ARGUMENT;
2013 }
2014
2015 void
2016 thread_read_times(
2017         thread_t                thread,
2018         time_value_t    *user_time,
2019         time_value_t    *system_time,
2020         time_value_t    *runnable_time)
2021 {
2022         clock_sec_t             secs;
2023         clock_usec_t    usecs;
2024         uint64_t                tval_user, tval_system;
2025
2026         tval_user = timer_grab(&thread->user_timer);
2027         tval_system = timer_grab(&thread->system_timer);
2028
2029         if (thread->precise_user_kernel_time) {
2030                 absolutetime_to_microtime(tval_user, &secs, &usecs);
2031                 user_time->seconds = (typeof(user_time->seconds))secs;
2032                 user_time->microseconds = usecs;
2033
2034                 absolutetime_to_microtime(tval_system, &secs, &usecs);
2035                 system_time->seconds = (typeof(system_time->seconds))secs;
2036                 system_time->microseconds = usecs;
2037         } else {
2038                 /* system_timer may represent either sys or user */
2039                 tval_user += tval_system;
2040                 absolutetime_to_microtime(tval_user, &secs, &usecs);
2041                 user_time->seconds = (typeof(user_time->seconds))secs;
2042                 user_time->microseconds = usecs;
2043
2044                 system_time->seconds = 0;
2045                 system_time->microseconds = 0;
2046         }
2047
2048         if (runnable_time) {
2049                 uint64_t tval_runnable = timer_grab(&thread->runnable_timer);
2050                 absolutetime_to_microtime(tval_runnable, &secs, &usecs);
2051                 runnable_time->seconds = (typeof(runnable_time->seconds))secs;
2052                 runnable_time->microseconds = usecs;
2053         }
2054 }
2055
2056 uint64_t
2057 thread_get_runtime_self(void)
2058 {
2059         boolean_t interrupt_state;
2060         uint64_t runtime;
2061         thread_t thread = NULL;
2062         processor_t processor = NULL;
2063
2064         thread = current_thread();
2065
2066         /* Not interrupt safe, as the scheduler may otherwise update timer values underneath us */
2067         interrupt_state = ml_set_interrupts_enabled(FALSE);
2068         processor = current_processor();
2069         timer_update(processor->thread_timer, mach_absolute_time());
2070         runtime = (timer_grab(&thread->user_timer) + timer_grab(&thread->system_timer));
2071         ml_set_interrupts_enabled(interrupt_state);
2072
2073         return runtime;
2074 }
2075
2076 kern_return_t
2077 thread_assign(
2078         __unused thread_t                       thread,
2079         __unused processor_set_t        new_pset)
2080 {
2081         return KERN_FAILURE;
2082 }
2083
2084 /*
2085  *      thread_assign_default:
2086  *
2087  *      Special version of thread_assign for assigning threads to default
2088  *      processor set.
2089  */
2090 kern_return_t
2091 thread_assign_default(
2092         thread_t                thread)
2093 {
2094         return thread_assign(thread, &pset0);
2095 }
2096
2097 /*
2098  *      thread_get_assignment
2099  *
2100  *      Return current assignment for this thread.
2101  */
2102 kern_return_t
2103 thread_get_assignment(
2104         thread_t                thread,
2105         processor_set_t *pset)
2106 {
2107         if (thread == NULL) {
2108                 return KERN_INVALID_ARGUMENT;
2109         }
2110
2111         *pset = &pset0;
2112
2113         return KERN_SUCCESS;
2114 }
2115
2116 /*
2117  *      thread_wire_internal:
2118  *
2119  *      Specify that the target thread must always be able
2120  *      to run and to allocate memory.
2121  */
2122 kern_return_t
2123 thread_wire_internal(
2124         host_priv_t             host_priv,
2125         thread_t                thread,
2126         boolean_t               wired,
2127         boolean_t               *prev_state)
2128 {
2129         if (host_priv == NULL || thread != current_thread()) {
2130                 return KERN_INVALID_ARGUMENT;
2131         }
2132
2133         if (prev_state) {
2134                 *prev_state = (thread->options & TH_OPT_VMPRIV) != 0;
2135         }
2136
2137         if (wired) {
2138                 if (!(thread->options & TH_OPT_VMPRIV)) {
2139                         vm_page_free_reserve(1); /* XXX */
2140                 }
2141                 thread->options |= TH_OPT_VMPRIV;
2142         } else {
2143                 if (thread->options & TH_OPT_VMPRIV) {
2144                         vm_page_free_reserve(-1); /* XXX */
2145                 }
2146                 thread->options &= ~TH_OPT_VMPRIV;
2147         }
2148
2149         return KERN_SUCCESS;
2150 }
2151
2152
2153 /*
2154  *      thread_wire:
2155  *
2156  *      User-api wrapper for thread_wire_internal()
2157  */
2158 kern_return_t
2159 thread_wire(
2160         host_priv_t     host_priv,
2161         thread_t        thread,
2162         boolean_t       wired)
2163 {
2164         return thread_wire_internal(host_priv, thread, wired, NULL);
2165 }
2166
2167
2168 boolean_t
2169 is_vm_privileged(void)
2170 {
2171         return current_thread()->options & TH_OPT_VMPRIV ? TRUE : FALSE;
2172 }
2173
2174 boolean_t
2175 set_vm_privilege(boolean_t privileged)
2176 {
2177         boolean_t       was_vmpriv;
2178
2179         if (current_thread()->options & TH_OPT_VMPRIV) {
2180                 was_vmpriv = TRUE;
2181         } else {
2182                 was_vmpriv = FALSE;
2183         }
2184
2185         if (privileged != FALSE) {
2186                 current_thread()->options |= TH_OPT_VMPRIV;
2187         } else {
2188                 current_thread()->options &= ~TH_OPT_VMPRIV;
2189         }
2190
2191         return was_vmpriv;
2192 }
2193
2194 void
2195 set_thread_rwlock_boost(void)
2196 {
2197         current_thread()->rwlock_count++;
2198 }
2199
2200 void
2201 clear_thread_rwlock_boost(void)
2202 {
2203         thread_t thread = current_thread();
2204
2205         if ((thread->rwlock_count-- == 1) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2206                 lck_rw_clear_promotion(thread, 0);
2207         }
2208 }
2209
2210 /*
2211  * XXX assuming current thread only, for now...
2212  */
2213 void
2214 thread_guard_violation(thread_t thread,
2215     mach_exception_data_type_t code, mach_exception_data_type_t subcode, boolean_t fatal)
2216 {
2217         assert(thread == current_thread());
2218
2219         /* Don't set up the AST for kernel threads; this check is needed to ensure
2220          * that the guard_exc_* fields in the thread structure are set only by the
2221          * current thread and therefore, don't require a lock.
2222          */
2223         if (thread->task == kernel_task) {
2224                 return;
2225         }
2226
2227         assert(EXC_GUARD_DECODE_GUARD_TYPE(code));
2228
2229         /*
2230          * Use the saved state area of the thread structure
2231          * to store all info required to handle the AST when
2232          * returning to userspace. It's possible that there is
2233          * already a pending guard exception. If it's non-fatal,
2234          * it can only be over-written by a fatal exception code.
2235          */
2236         if (thread->guard_exc_info.code && (thread->guard_exc_fatal || !fatal)) {
2237                 return;
2238         }
2239
2240         thread->guard_exc_info.code = code;
2241         thread->guard_exc_info.subcode = subcode;
2242         thread->guard_exc_fatal = fatal ? 1 : 0;
2243
2244         spl_t s = splsched();
2245         thread_ast_set(thread, AST_GUARD);
2246         ast_propagate(thread);
2247         splx(s);
2248 }
2249
2250 /*
2251  *      guard_ast:
2252  *
2253  *      Handle AST_GUARD for a thread. This routine looks at the
2254  *      state saved in the thread structure to determine the cause
2255  *      of this exception. Based on this value, it invokes the
2256  *      appropriate routine which determines other exception related
2257  *      info and raises the exception.
2258  */
2259 void
2260 guard_ast(thread_t t)
2261 {
2262         const mach_exception_data_type_t
2263             code = t->guard_exc_info.code,
2264             subcode = t->guard_exc_info.subcode;
2265
2266         t->guard_exc_info.code = 0;
2267         t->guard_exc_info.subcode = 0;
2268         t->guard_exc_fatal = 0;
2269
2270         switch (EXC_GUARD_DECODE_GUARD_TYPE(code)) {
2271         case GUARD_TYPE_NONE:
2272                 /* lingering AST_GUARD on the processor? */
2273                 break;
2274         case GUARD_TYPE_MACH_PORT:
2275                 mach_port_guard_ast(t, code, subcode);
2276                 break;
2277         case GUARD_TYPE_FD:
2278                 fd_guard_ast(t, code, subcode);
2279                 break;
2280 #if CONFIG_VNGUARD
2281         case GUARD_TYPE_VN:
2282                 vn_guard_ast(t, code, subcode);
2283                 break;
2284 #endif
2285         case GUARD_TYPE_VIRT_MEMORY:
2286                 virt_memory_guard_ast(t, code, subcode);
2287                 break;
2288         default:
2289                 panic("guard_exc_info %llx %llx", code, subcode);
2290         }
2291 }
2292
2293 static void
2294 thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1)
2295 {
2296         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
2297 #if CONFIG_TELEMETRY
2298                 /*
2299                  * This thread is in danger of violating the CPU usage monitor. Enable telemetry
2300                  * on the entire task so there are micro-stackshots available if and when
2301                  * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots
2302                  * for this thread only; but now that this task is suspect, knowing what all of
2303                  * its threads are up to will be useful.
2304                  */
2305                 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1);
2306 #endif
2307                 return;
2308         }
2309
2310 #if CONFIG_TELEMETRY
2311         /*
2312          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
2313          * exceeded the limit, turn telemetry off for the task.
2314          */
2315         telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0);
2316 #endif
2317
2318         if (warning == 0) {
2319                 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU();
2320         }
2321 }
2322
2323 void __attribute__((noinline))
2324 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void)
2325 {
2326         int          pid                = 0;
2327         task_t           task                           = current_task();
2328         thread_t     thread             = current_thread();
2329         uint64_t     tid                = thread->thread_id;
2330         const char       *procname          = "unknown";
2331         time_value_t thread_total_time  = {0, 0};
2332         time_value_t thread_system_time;
2333         time_value_t thread_user_time;
2334         int          action;
2335         uint8_t      percentage;
2336         uint32_t     usage_percent = 0;
2337         uint32_t     interval_sec;
2338         uint64_t     interval_ns;
2339         uint64_t     balance_ns;
2340         boolean_t        fatal = FALSE;
2341         boolean_t        send_exc_resource = TRUE; /* in addition to RESOURCE_NOTIFY */
2342         kern_return_t   kr;
2343
2344 #ifdef EXC_RESOURCE_MONITORS
2345         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
2346 #endif /* EXC_RESOURCE_MONITORS */
2347         struct ledger_entry_info        lei;
2348
2349         assert(thread->t_threadledger != LEDGER_NULL);
2350
2351         /*
2352          * Extract the fatal bit and suspend the monitor (which clears the bit).
2353          */
2354         task_lock(task);
2355         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) {
2356                 fatal = TRUE;
2357                 send_exc_resource = TRUE;
2358         }
2359         /* Only one thread can be here at a time.  Whichever makes it through
2360          *  first will successfully suspend the monitor and proceed to send the
2361          *  notification.  Other threads will get an error trying to suspend the
2362          *  monitor and give up on sending the notification.  In the first release,
2363          *  the monitor won't be resumed for a number of seconds, but we may
2364          *  eventually need to handle low-latency resume.
2365          */
2366         kr = task_suspend_cpumon(task);
2367         task_unlock(task);
2368         if (kr == KERN_INVALID_ARGUMENT) {
2369                 return;
2370         }
2371
2372 #ifdef MACH_BSD
2373         pid = proc_selfpid();
2374         if (task->bsd_info != NULL) {
2375                 procname = proc_name_address(task->bsd_info);
2376         }
2377 #endif
2378
2379         thread_get_cpulimit(&action, &percentage, &interval_ns);
2380
2381         interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC);
2382
2383         thread_read_times(thread, &thread_user_time, &thread_system_time, NULL);
2384         time_value_add(&thread_total_time, &thread_user_time);
2385         time_value_add(&thread_total_time, &thread_system_time);
2386         ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei);
2387
2388         /* credit/debit/balance/limit are in absolute time units;
2389          *  the refill info is in nanoseconds. */
2390         absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns);
2391         if (lei.lei_last_refill > 0) {
2392                 usage_percent = (uint32_t)((balance_ns * 100ULL) / lei.lei_last_refill);
2393         }
2394
2395         /* TODO: show task total runtime (via TASK_ABSOLUTETIME_INFO)? */
2396         printf("process %s[%d] thread %llu caught burning CPU! It used more than %d%% CPU over %u seconds\n",
2397             procname, pid, tid, percentage, interval_sec);
2398         printf("  (actual recent usage: %d%% over ~%llu seconds)\n",
2399             usage_percent, (lei.lei_last_refill + NSEC_PER_SEC / 2) / NSEC_PER_SEC);
2400         printf("  Thread lifetime cpu usage %d.%06ds, (%d.%06d user, %d.%06d sys)\n",
2401             thread_total_time.seconds, thread_total_time.microseconds,
2402             thread_user_time.seconds, thread_user_time.microseconds,
2403             thread_system_time.seconds, thread_system_time.microseconds);
2404         printf("  Ledger balance: %lld; mabs credit: %lld; mabs debit: %lld\n",
2405             lei.lei_balance, lei.lei_credit, lei.lei_debit);
2406         printf("  mabs limit: %llu; mabs period: %llu ns; last refill: %llu ns%s.\n",
2407             lei.lei_limit, lei.lei_refill_period, lei.lei_last_refill,
2408             (fatal ? " [fatal violation]" : ""));
2409
2410         /*
2411          *  For now, send RESOURCE_NOTIFY in parallel with EXC_RESOURCE.  Once
2412          *  we have logging parity, we will stop sending EXC_RESOURCE (24508922).
2413          */
2414
2415         /* RESOURCE_NOTIFY MIG specifies nanoseconds of CPU time */
2416         lei.lei_balance = balance_ns;
2417         absolutetime_to_nanoseconds(lei.lei_limit, &lei.lei_limit);
2418         trace_resource_violation(RMON_CPUUSAGE_VIOLATED, &lei);
2419         kr = send_resource_violation(send_cpu_usage_violation, task, &lei,
2420             fatal ? kRNFatalLimitFlag : 0);
2421         if (kr) {
2422                 printf("send_resource_violation(CPU usage, ...): error %#x\n", kr);
2423         }
2424
2425 #ifdef EXC_RESOURCE_MONITORS
2426         if (send_exc_resource) {
2427                 if (disable_exc_resource) {
2428                         printf("process %s[%d] thread %llu caught burning CPU! "
2429                             "EXC_RESOURCE%s supressed by a boot-arg\n",
2430                             procname, pid, tid, fatal ? " (and termination)" : "");
2431                         return;
2432                 }
2433
2434                 if (audio_active) {
2435                         printf("process %s[%d] thread %llu caught burning CPU! "
2436                             "EXC_RESOURCE & termination supressed due to audio playback\n",
2437                             procname, pid, tid);
2438                         return;
2439                 }
2440         }
2441
2442
2443         if (send_exc_resource) {
2444                 code[0] = code[1] = 0;
2445                 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU);
2446                 if (fatal) {
2447                         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR_FATAL);
2448                 } else {
2449                         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR);
2450                 }
2451                 EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec);
2452                 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], percentage);
2453                 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent);
2454                 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
2455         }
2456 #endif /* EXC_RESOURCE_MONITORS */
2457
2458         if (fatal) {
2459 #if CONFIG_JETSAM
2460                 jetsam_on_ledger_cpulimit_exceeded();
2461 #else
2462                 task_terminate_internal(task);
2463 #endif
2464         }
2465 }
2466
2467 #if DEVELOPMENT || DEBUG
2468 void __attribute__((noinline))
2469 SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(task_t task, int thread_count)
2470 {
2471         mach_exception_data_type_t code[EXCEPTION_CODE_MAX] = {0};
2472         int pid = task_pid(task);
2473         char procname[MAXCOMLEN + 1] = "unknown";
2474
2475         if (pid == 1) {
2476                 /*
2477                  * Cannot suspend launchd
2478                  */
2479                 return;
2480         }
2481
2482         proc_name(pid, procname, sizeof(procname));
2483
2484         if (disable_exc_resource) {
2485                 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2486                     "supressed by a boot-arg. \n", procname, pid, thread_count);
2487                 return;
2488         }
2489
2490         if (audio_active) {
2491                 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2492                     "supressed due to audio playback.\n", procname, pid, thread_count);
2493                 return;
2494         }
2495
2496         if (exc_via_corpse_forking == 0) {
2497                 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2498                     "supressed due to corpse forking being disabled.\n", procname, pid,
2499                     thread_count);
2500                 return;
2501         }
2502
2503         printf("process %s[%d] crossed thread count high watermark (%d), sending "
2504             "EXC_RESOURCE\n", procname, pid, thread_count);
2505
2506         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_THREADS);
2507         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_THREADS_HIGH_WATERMARK);
2508         EXC_RESOURCE_THREADS_ENCODE_THREADS(code[0], thread_count);
2509
2510         task_enqueue_exception_with_corpse(task, EXC_RESOURCE, code, EXCEPTION_CODE_MAX, NULL);
2511 }
2512 #endif /* DEVELOPMENT || DEBUG */
2513
2514 void
2515 thread_update_io_stats(thread_t thread, int size, int io_flags)
2516 {
2517         int io_tier;
2518
2519         if (thread->thread_io_stats == NULL || thread->task->task_io_stats == NULL) {
2520                 return;
2521         }
2522
2523         if (io_flags & DKIO_READ) {
2524                 UPDATE_IO_STATS(thread->thread_io_stats->disk_reads, size);
2525                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->disk_reads, size);
2526         }
2527
2528         if (io_flags & DKIO_META) {
2529                 UPDATE_IO_STATS(thread->thread_io_stats->metadata, size);
2530                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->metadata, size);
2531         }
2532
2533         if (io_flags & DKIO_PAGING) {
2534                 UPDATE_IO_STATS(thread->thread_io_stats->paging, size);
2535                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->paging, size);
2536         }
2537
2538         io_tier = ((io_flags & DKIO_TIER_MASK) >> DKIO_TIER_SHIFT);
2539         assert(io_tier < IO_NUM_PRIORITIES);
2540
2541         UPDATE_IO_STATS(thread->thread_io_stats->io_priority[io_tier], size);
2542         UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->io_priority[io_tier], size);
2543
2544         /* Update Total I/O Counts */
2545         UPDATE_IO_STATS(thread->thread_io_stats->total_io, size);
2546         UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->total_io, size);
2547
2548         if (!(io_flags & DKIO_READ)) {
2549                 DTRACE_IO3(physical_writes, struct task *, thread->task, uint32_t, size, int, io_flags);
2550                 ledger_credit(thread->task->ledger, task_ledgers.physical_writes, size);
2551         }
2552 }
2553
2554 static void
2555 init_thread_ledgers(void)
2556 {
2557         ledger_template_t t;
2558         int idx;
2559
2560         assert(thread_ledger_template == NULL);
2561
2562         if ((t = ledger_template_create("Per-thread ledger")) == NULL) {
2563                 panic("couldn't create thread ledger template");
2564         }
2565
2566         if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) {
2567                 panic("couldn't create cpu_time entry for thread ledger template");
2568         }
2569
2570         if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) {
2571                 panic("couldn't set thread ledger callback for cpu_time entry");
2572         }
2573
2574         thread_ledgers.cpu_time = idx;
2575
2576         ledger_template_complete(t);
2577         thread_ledger_template = t;
2578 }
2579
2580 /*
2581  * Returns currently applied CPU usage limit, or 0/0 if none is applied.
2582  */
2583 int
2584 thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns)
2585 {
2586         int64_t         abstime = 0;
2587         uint64_t        limittime = 0;
2588         thread_t        thread = current_thread();
2589
2590         *percentage  = 0;
2591         *interval_ns = 0;
2592         *action      = 0;
2593
2594         if (thread->t_threadledger == LEDGER_NULL) {
2595                 /*
2596                  * This thread has no per-thread ledger, so it can't possibly
2597                  * have a CPU limit applied.
2598                  */
2599                 return KERN_SUCCESS;
2600         }
2601
2602         ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns);
2603         ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime);
2604
2605         if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) {
2606                 /*
2607                  * This thread's CPU time ledger has no period or limit; so it
2608                  * doesn't have a CPU limit applied.
2609                  */
2610                 return KERN_SUCCESS;
2611         }
2612
2613         /*
2614          * This calculation is the converse to the one in thread_set_cpulimit().
2615          */
2616         absolutetime_to_nanoseconds(abstime, &limittime);
2617         *percentage = (uint8_t)((limittime * 100ULL) / *interval_ns);
2618         assert(*percentage <= 100);
2619
2620         if (thread->options & TH_OPT_PROC_CPULIMIT) {
2621                 assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0);
2622
2623                 *action = THREAD_CPULIMIT_BLOCK;
2624         } else if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2625                 assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0);
2626
2627                 *action = THREAD_CPULIMIT_EXCEPTION;
2628         } else {
2629                 *action = THREAD_CPULIMIT_DISABLE;
2630         }
2631
2632         return KERN_SUCCESS;
2633 }
2634
2635 /*
2636  * Set CPU usage limit on a thread.
2637  *
2638  * Calling with percentage of 0 will unset the limit for this thread.
2639  */
2640 int
2641 thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns)
2642 {
2643         thread_t        thread = current_thread();
2644         ledger_t        l;
2645         uint64_t        limittime = 0;
2646         uint64_t        abstime = 0;
2647
2648         assert(percentage <= 100);
2649
2650         if (action == THREAD_CPULIMIT_DISABLE) {
2651                 /*
2652                  * Remove CPU limit, if any exists.
2653                  */
2654                 if (thread->t_threadledger != LEDGER_NULL) {
2655                         l = thread->t_threadledger;
2656                         ledger_set_limit(l, thread_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
2657                         ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_IGNORE);
2658                         thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT);
2659                 }
2660
2661                 return 0;
2662         }
2663
2664         if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) {
2665                 return KERN_INVALID_ARGUMENT;
2666         }
2667
2668         l = thread->t_threadledger;
2669         if (l == LEDGER_NULL) {
2670                 /*
2671                  * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active.
2672                  */
2673                 if ((l = ledger_instantiate(thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL) {
2674                         return KERN_RESOURCE_SHORTAGE;
2675                 }
2676
2677                 /*
2678                  * We are the first to create this thread's ledger, so only activate our entry.
2679                  */
2680                 ledger_entry_setactive(l, thread_ledgers.cpu_time);
2681                 thread->t_threadledger = l;
2682         }
2683
2684         /*
2685          * The limit is specified as a percentage of CPU over an interval in nanoseconds.
2686          * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit.
2687          */
2688         limittime = (interval_ns * percentage) / 100;
2689         nanoseconds_to_absolutetime(limittime, &abstime);
2690         ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct);
2691         /*
2692          * Refill the thread's allotted CPU time every interval_ns nanoseconds.
2693          */
2694         ledger_set_period(l, thread_ledgers.cpu_time, interval_ns);
2695
2696         if (action == THREAD_CPULIMIT_EXCEPTION) {
2697                 /*
2698                  * We don't support programming the CPU usage monitor on a task if any of its
2699                  * threads have a per-thread blocking CPU limit configured.
2700                  */
2701                 if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2702                         panic("CPU usage monitor activated, but blocking thread limit exists");
2703                 }
2704
2705                 /*
2706                  * Make a note that this thread's CPU limit is being used for the task-wide CPU
2707                  * usage monitor. We don't have to arm the callback which will trigger the
2708                  * exception, because that was done for us in ledger_instantiate (because the
2709                  * ledger template used has a default callback).
2710                  */
2711                 thread->options |= TH_OPT_PROC_CPULIMIT;
2712         } else {
2713                 /*
2714                  * We deliberately override any CPU limit imposed by a task-wide limit (eg
2715                  * CPU usage monitor).
2716                  */
2717                 thread->options &= ~TH_OPT_PROC_CPULIMIT;
2718
2719                 thread->options |= TH_OPT_PRVT_CPULIMIT;
2720                 /* The per-thread ledger template by default has a callback for CPU time */
2721                 ledger_disable_callback(l, thread_ledgers.cpu_time);
2722                 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
2723         }
2724
2725         return 0;
2726 }
2727
2728 void
2729 thread_sched_call(
2730         thread_t                thread,
2731         sched_call_t    call)
2732 {
2733         assert((thread->state & TH_WAIT_REPORT) == 0);
2734         thread->sched_call = call;
2735 }
2736
2737 uint64_t
2738 thread_tid(
2739         thread_t        thread)
2740 {
2741         return thread != THREAD_NULL? thread->thread_id: 0;
2742 }
2743
2744 uint16_t
2745 thread_set_tag(thread_t th, uint16_t tag)
2746 {
2747         return thread_set_tag_internal(th, tag);
2748 }
2749
2750 uint16_t
2751 thread_get_tag(thread_t th)
2752 {
2753         return thread_get_tag_internal(th);
2754 }
2755
2756 uint64_t
2757 thread_last_run_time(thread_t th)
2758 {
2759         return th->last_run_time;
2760 }
2761
2762 uint64_t
2763 thread_dispatchqaddr(
2764         thread_t                thread)
2765 {
2766         uint64_t        dispatchqueue_addr;
2767         uint64_t        thread_handle;
2768
2769         if (thread == THREAD_NULL) {
2770                 return 0;
2771         }
2772
2773         thread_handle = thread->machine.cthread_self;
2774         if (thread_handle == 0) {
2775                 return 0;
2776         }
2777
2778         if (thread->inspection == TRUE) {
2779                 dispatchqueue_addr = thread_handle + get_task_dispatchqueue_offset(thread->task);
2780         } else if (thread->task->bsd_info) {
2781                 dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
2782         } else {
2783                 dispatchqueue_addr = 0;
2784         }
2785
2786         return dispatchqueue_addr;
2787 }
2788
2789 uint64_t
2790 thread_rettokern_addr(
2791         thread_t                thread)
2792 {
2793         uint64_t        rettokern_addr;
2794         uint64_t        rettokern_offset;
2795         uint64_t        thread_handle;
2796
2797         if (thread == THREAD_NULL) {
2798                 return 0;
2799         }
2800
2801         thread_handle = thread->machine.cthread_self;
2802         if (thread_handle == 0) {
2803                 return 0;
2804         }
2805
2806         if (thread->task->bsd_info) {
2807                 rettokern_offset = get_return_to_kernel_offset_from_proc(thread->task->bsd_info);
2808
2809                 /* Return 0 if return to kernel offset is not initialized. */
2810                 if (rettokern_offset == 0) {
2811                         rettokern_addr = 0;
2812                 } else {
2813                         rettokern_addr = thread_handle + rettokern_offset;
2814                 }
2815         } else {
2816                 rettokern_addr = 0;
2817         }
2818
2819         return rettokern_addr;
2820 }
2821
2822 /*
2823  * Export routines to other components for things that are done as macros
2824  * within the osfmk component.
2825  */
2826
2827 #undef thread_mtx_lock
2828 void thread_mtx_lock(thread_t thread);
2829 void
2830 thread_mtx_lock(thread_t thread)
2831 {
2832         lck_mtx_lock(&thread->mutex);
2833 }
2834
2835 #undef thread_mtx_unlock
2836 void thread_mtx_unlock(thread_t thread);
2837 void
2838 thread_mtx_unlock(thread_t thread)
2839 {
2840         lck_mtx_unlock(&thread->mutex);
2841 }
2842
2843 #undef thread_reference
2844 void thread_reference(thread_t thread);
2845 void
2846 thread_reference(
2847         thread_t        thread)
2848 {
2849         if (thread != THREAD_NULL) {
2850                 thread_reference_internal(thread);
2851         }
2852 }
2853
2854 #undef thread_should_halt
2855
2856 boolean_t
2857 thread_should_halt(
2858         thread_t                th)
2859 {
2860         return thread_should_halt_fast(th);
2861 }
2862
2863 /*
2864  * thread_set_voucher_name - reset the voucher port name bound to this thread
2865  *
2866  * Conditions:  nothing locked
2867  */
2868
2869 kern_return_t
2870 thread_set_voucher_name(mach_port_name_t voucher_name)
2871 {
2872         thread_t thread = current_thread();
2873         ipc_voucher_t new_voucher = IPC_VOUCHER_NULL;
2874         ipc_voucher_t voucher;
2875         ledger_t bankledger = NULL;
2876         struct thread_group *banktg = NULL;
2877         uint32_t persona_id = 0;
2878
2879         if (MACH_PORT_DEAD == voucher_name) {
2880                 return KERN_INVALID_RIGHT;
2881         }
2882
2883         /*
2884          * agressively convert to voucher reference
2885          */
2886         if (MACH_PORT_VALID(voucher_name)) {
2887                 new_voucher = convert_port_name_to_voucher(voucher_name);
2888                 if (IPC_VOUCHER_NULL == new_voucher) {
2889                         return KERN_INVALID_ARGUMENT;
2890                 }
2891         }
2892         bank_get_bank_ledger_thread_group_and_persona(new_voucher, &bankledger, &banktg, &persona_id);
2893
2894         thread_mtx_lock(thread);
2895         voucher = thread->ith_voucher;
2896         thread->ith_voucher_name = voucher_name;
2897         thread->ith_voucher = new_voucher;
2898         thread_mtx_unlock(thread);
2899
2900         bank_swap_thread_bank_ledger(thread, bankledger);
2901 #if CONFIG_THREAD_GROUPS
2902         thread_group_set_bank(thread, banktg);
2903 #endif /* CONFIG_THREAD_GROUPS */
2904
2905         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2906             MACHDBG_CODE(DBG_MACH_IPC, MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2907             (uintptr_t)thread_tid(thread),
2908             (uintptr_t)voucher_name,
2909             VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
2910             persona_id, 0);
2911
2912         if (IPC_VOUCHER_NULL != voucher) {
2913                 ipc_voucher_release(voucher);
2914         }
2915
2916         return KERN_SUCCESS;
2917 }
2918
2919 /*
2920  *  thread_get_mach_voucher - return a voucher reference for the specified thread voucher
2921  *
2922  *  Conditions:  nothing locked
2923  *
2924  *  NOTE:       At the moment, there is no distinction between the current and effective
2925  *              vouchers because we only set them at the thread level currently.
2926  */
2927 kern_return_t
2928 thread_get_mach_voucher(
2929         thread_act_t            thread,
2930         mach_voucher_selector_t __unused which,
2931         ipc_voucher_t           *voucherp)
2932 {
2933         ipc_voucher_t           voucher;
2934
2935         if (THREAD_NULL == thread) {
2936                 return KERN_INVALID_ARGUMENT;
2937         }
2938
2939         thread_mtx_lock(thread);
2940         voucher = thread->ith_voucher;
2941
2942         if (IPC_VOUCHER_NULL != voucher) {
2943                 ipc_voucher_reference(voucher);
2944                 thread_mtx_unlock(thread);
2945                 *voucherp = voucher;
2946                 return KERN_SUCCESS;
2947         }
2948
2949         thread_mtx_unlock(thread);
2950
2951         *voucherp = IPC_VOUCHER_NULL;
2952         return KERN_SUCCESS;
2953 }
2954
2955 /*
2956  *  thread_set_mach_voucher - set a voucher reference for the specified thread voucher
2957  *
2958  *  Conditions: callers holds a reference on the voucher.
2959  *              nothing locked.
2960  *
2961  *  We grab another reference to the voucher and bind it to the thread.
2962  *  The old voucher reference associated with the thread is
2963  *  discarded.
2964  */
2965 kern_return_t
2966 thread_set_mach_voucher(
2967         thread_t                thread,
2968         ipc_voucher_t           voucher)
2969 {
2970         ipc_voucher_t old_voucher;
2971         ledger_t bankledger = NULL;
2972         struct thread_group *banktg = NULL;
2973         uint32_t persona_id = 0;
2974
2975         if (THREAD_NULL == thread) {
2976                 return KERN_INVALID_ARGUMENT;
2977         }
2978
2979         bank_get_bank_ledger_thread_group_and_persona(voucher, &bankledger, &banktg, &persona_id);
2980
2981         thread_mtx_lock(thread);
2982         /*
2983          * Once the thread is started, we will look at `ith_voucher` without
2984          * holding any lock.
2985          *
2986          * Setting the voucher hence can only be done by current_thread() or
2987          * before it started. "started" flips under the thread mutex and must be
2988          * tested under it too.
2989          */
2990         if (thread != current_thread() && thread->started) {
2991                 thread_mtx_unlock(thread);
2992                 return KERN_INVALID_ARGUMENT;
2993         }
2994
2995         ipc_voucher_reference(voucher);
2996         old_voucher = thread->ith_voucher;
2997         thread->ith_voucher = voucher;
2998         thread->ith_voucher_name = MACH_PORT_NULL;
2999         thread_mtx_unlock(thread);
3000
3001         bank_swap_thread_bank_ledger(thread, bankledger);
3002 #if CONFIG_THREAD_GROUPS
3003         thread_group_set_bank(thread, banktg);
3004 #endif /* CONFIG_THREAD_GROUPS */
3005
3006         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3007             MACHDBG_CODE(DBG_MACH_IPC, MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
3008             (uintptr_t)thread_tid(thread),
3009             (uintptr_t)MACH_PORT_NULL,
3010             VM_KERNEL_ADDRPERM((uintptr_t)voucher),
3011             persona_id, 0);
3012
3013         ipc_voucher_release(old_voucher);
3014
3015         return KERN_SUCCESS;
3016 }
3017
3018 /*
3019  *  thread_swap_mach_voucher - swap a voucher reference for the specified thread voucher
3020  *
3021  *  Conditions: callers holds a reference on the new and presumed old voucher(s).
3022  *              nothing locked.
3023  *
3024  *  This function is no longer supported.
3025  */
3026 kern_return_t
3027 thread_swap_mach_voucher(
3028         __unused thread_t               thread,
3029         __unused ipc_voucher_t          new_voucher,
3030         ipc_voucher_t                   *in_out_old_voucher)
3031 {
3032         /*
3033          * Currently this function is only called from a MIG generated
3034          * routine which doesn't release the reference on the voucher
3035          * addressed by in_out_old_voucher. To avoid leaking this reference,
3036          * a call to release it has been added here.
3037          */
3038         ipc_voucher_release(*in_out_old_voucher);
3039         return KERN_NOT_SUPPORTED;
3040 }
3041
3042 /*
3043  *  thread_get_current_voucher_origin_pid - get the pid of the originator of the current voucher.
3044  */
3045 kern_return_t
3046 thread_get_current_voucher_origin_pid(
3047         int32_t      *pid)
3048 {
3049         uint32_t buf_size;
3050         kern_return_t kr;
3051         thread_t thread = current_thread();
3052
3053         buf_size = sizeof(*pid);
3054         kr = mach_voucher_attr_command(thread->ith_voucher,
3055             MACH_VOUCHER_ATTR_KEY_BANK,
3056             BANK_ORIGINATOR_PID,
3057             NULL,
3058             0,
3059             (mach_voucher_attr_content_t)pid,
3060             &buf_size);
3061
3062         return kr;
3063 }
3064
3065 #if CONFIG_THREAD_GROUPS
3066 /*
3067  * Returns the current thread's voucher-carried thread group
3068  *
3069  * Reference is borrowed from this being the current voucher, so it does NOT
3070  * return a reference to the group.
3071  */
3072 struct thread_group *
3073 thread_get_current_voucher_thread_group(thread_t thread)
3074 {
3075         assert(thread == current_thread());
3076
3077         if (thread->ith_voucher == NULL) {
3078                 return NULL;
3079         }
3080
3081         ledger_t bankledger = NULL;
3082         struct thread_group *banktg = NULL;
3083
3084         bank_get_bank_ledger_thread_group_and_persona(thread->ith_voucher, &bankledger, &banktg, NULL);
3085
3086         return banktg;
3087 }
3088
3089 #endif /* CONFIG_THREAD_GROUPS */
3090
3091 boolean_t
3092 thread_has_thread_name(thread_t th)
3093 {
3094         if ((th) && (th->uthread)) {
3095                 return bsd_hasthreadname(th->uthread);
3096         }
3097
3098         /*
3099          * This is an odd case; clients may set the thread name based on the lack of
3100          * a name, but in this context there is no uthread to attach the name to.
3101          */
3102         return FALSE;
3103 }
3104
3105 void
3106 thread_set_thread_name(thread_t th, const char* name)
3107 {
3108         if ((th) && (th->uthread) && name) {
3109                 bsd_setthreadname(th->uthread, name);
3110         }
3111 }
3112
3113 void
3114 thread_get_thread_name(thread_t th, char* name)
3115 {
3116         if (!name) {
3117                 return;
3118         }
3119         if ((th) && (th->uthread)) {
3120                 bsd_getthreadname(th->uthread, name);
3121         } else {
3122                 name[0] = '\0';
3123         }
3124 }
3125
3126 void
3127 thread_set_honor_qlimit(thread_t thread)
3128 {
3129         thread->options |= TH_OPT_HONOR_QLIMIT;
3130 }
3131
3132 void
3133 thread_clear_honor_qlimit(thread_t thread)
3134 {
3135         thread->options &= (~TH_OPT_HONOR_QLIMIT);
3136 }
3137
3138 /*
3139  * thread_enable_send_importance - set/clear the SEND_IMPORTANCE thread option bit.
3140  */
3141 void
3142 thread_enable_send_importance(thread_t thread, boolean_t enable)
3143 {
3144         if (enable == TRUE) {
3145                 thread->options |= TH_OPT_SEND_IMPORTANCE;
3146         } else {
3147                 thread->options &= ~TH_OPT_SEND_IMPORTANCE;
3148         }
3149 }
3150
3151 /*
3152  * thread_set_allocation_name - .
3153  */
3154
3155 kern_allocation_name_t
3156 thread_set_allocation_name(kern_allocation_name_t new_name)
3157 {
3158         kern_allocation_name_t ret;
3159         thread_kernel_state_t kstate = thread_get_kernel_state(current_thread());
3160         ret = kstate->allocation_name;
3161         // fifo
3162         if (!new_name || !kstate->allocation_name) {
3163                 kstate->allocation_name = new_name;
3164         }
3165         return ret;
3166 }
3167
3168 void *
3169 thread_iokit_tls_get(uint32_t index)
3170 {
3171         assert(index < THREAD_SAVE_IOKIT_TLS_COUNT);
3172         return current_thread()->saved.iokit.tls[index];
3173 }
3174
3175 void
3176 thread_iokit_tls_set(uint32_t index, void * data)
3177 {
3178         assert(index < THREAD_SAVE_IOKIT_TLS_COUNT);
3179         current_thread()->saved.iokit.tls[index] = data;
3180 }
3181
3182 uint64_t
3183 thread_get_last_wait_duration(thread_t thread)
3184 {
3185         return thread->last_made_runnable_time - thread->last_run_time;
3186 }
3187
3188 integer_t
3189 thread_kern_get_pri(thread_t thr)
3190 {
3191         return thr->base_pri;
3192 }
3193
3194 void
3195 thread_kern_set_pri(thread_t thr, integer_t pri)
3196 {
3197         sched_set_kernel_thread_priority(thr, pri);
3198 }
3199
3200 integer_t
3201 thread_kern_get_kernel_maxpri(void)
3202 {
3203         return MAXPRI_KERNEL;
3204 }
3205 /*
3206  *      thread_port_with_flavor_notify
3207  *
3208  *      Called whenever the Mach port system detects no-senders on
3209  *      the thread inspect or read port. These ports are allocated lazily and
3210  *      should be deallocated here when there are no senders remaining.
3211  */
3212 void
3213 thread_port_with_flavor_notify(mach_msg_header_t *msg)
3214 {
3215         mach_no_senders_notification_t *notification = (void *)msg;
3216         ipc_port_t port = notification->not_header.msgh_remote_port;
3217         thread_t thread;
3218         mach_thread_flavor_t flavor;
3219         ipc_kobject_type_t kotype;
3220
3221         ip_lock(port);
3222         if (port->ip_srights > 0) {
3223                 ip_unlock(port);
3224                 return;
3225         }
3226         thread = (thread_t)ipc_kobject_get(port);
3227         kotype = ip_kotype(port);
3228         if (thread != THREAD_NULL) {
3229                 assert((IKOT_THREAD_READ == kotype) || (IKOT_THREAD_INSPECT == kotype));
3230                 thread_reference_internal(thread);
3231         }
3232         ip_unlock(port);
3233
3234         if (thread == THREAD_NULL) {
3235                 /* The thread is exiting or disabled; it will eventually deallocate the port */
3236                 return;
3237         }
3238
3239         if (kotype == IKOT_THREAD_READ) {
3240                 flavor = THREAD_FLAVOR_READ;
3241         } else {
3242                 flavor = THREAD_FLAVOR_INSPECT;
3243         }
3244
3245         thread_mtx_lock(thread);
3246         ip_lock(port);
3247         /*
3248          * If the port is no longer active, then ipc_thread_terminate() ran
3249          * and destroyed the kobject already. Just deallocate the task
3250          * ref we took and go away.
3251          *
3252          * It is also possible that several nsrequests are in flight,
3253          * only one shall NULL-out the port entry, and this is the one
3254          * that gets to dealloc the port.
3255          *
3256          * Check for a stale no-senders notification. A call to any function
3257          * that vends out send rights to this port could resurrect it between
3258          * this notification being generated and actually being handled here.
3259          */
3260         if (!ip_active(port) ||
3261             thread->ith_thread_ports[flavor] != port ||
3262             port->ip_srights > 0) {
3263                 ip_unlock(port);
3264                 thread_mtx_unlock(thread);
3265                 thread_deallocate(thread);
3266                 return;
3267         }
3268
3269         assert(thread->ith_thread_ports[flavor] == port);
3270         thread->ith_thread_ports[flavor] = IP_NULL;
3271         ipc_kobject_set_atomically(port, IKO_NULL, IKOT_NONE);
3272         ip_unlock(port);
3273         thread_mtx_unlock(thread);
3274         thread_deallocate(thread);
3275
3276         ipc_port_dealloc_kernel(port);
3277 }
3278
3279 /*
3280  * The 'thread_region_page_shift' is used by footprint
3281  * to specify the page size that it will use to
3282  * accomplish its accounting work on the task being
3283  * inspected. Since footprint uses a thread for each
3284  * task that it works on, we need to keep the page_shift
3285  * on a per-thread basis.
3286  */
3287
3288 int
3289 thread_self_region_page_shift(void)
3290 {
3291         /*
3292          * Return the page shift that this thread
3293          * would like to use for its accounting work.
3294          */
3295         return current_thread()->thread_region_page_shift;
3296 }
3297
3298 void
3299 thread_self_region_page_shift_set(
3300         int pgshift)
3301 {
3302         /*
3303          * Set the page shift that this thread
3304          * would like to use for its accounting work
3305          * when dealing with a task.
3306          */
3307         current_thread()->thread_region_page_shift = pgshift;
3308 }
3309
3310 #if CONFIG_DTRACE
3311 uint32_t
3312 dtrace_get_thread_predcache(thread_t thread)
3313 {
3314         if (thread != THREAD_NULL) {
3315                 return thread->t_dtrace_predcache;
3316         } else {
3317                 return 0;
3318         }
3319 }
3320
3321 int64_t
3322 dtrace_get_thread_vtime(thread_t thread)
3323 {
3324         if (thread != THREAD_NULL) {
3325                 return thread->t_dtrace_vtime;
3326         } else {
3327                 return 0;
3328         }
3329 }
3330
3331 int
3332 dtrace_get_thread_last_cpu_id(thread_t thread)
3333 {
3334         if ((thread != THREAD_NULL) && (thread->last_processor != PROCESSOR_NULL)) {
3335                 return thread->last_processor->cpu_id;
3336         } else {
3337                 return -1;
3338         }
3339 }
3340
3341 int64_t
3342 dtrace_get_thread_tracing(thread_t thread)
3343 {
3344         if (thread != THREAD_NULL) {
3345                 return thread->t_dtrace_tracing;
3346         } else {
3347                 return 0;
3348         }
3349 }
3350
3351 uint16_t
3352 dtrace_get_thread_inprobe(thread_t thread)
3353 {
3354         if (thread != THREAD_NULL) {
3355                 return thread->t_dtrace_inprobe;
3356         } else {
3357                 return 0;
3358         }
3359 }
3360
3361 vm_offset_t
3362 dtrace_get_kernel_stack(thread_t thread)
3363 {
3364         if (thread != THREAD_NULL) {
3365                 return thread->kernel_stack;
3366         } else {
3367                 return 0;
3368         }
3369 }
3370
3371 #if KASAN
3372 struct kasan_thread_data *
3373 kasan_get_thread_data(thread_t thread)
3374 {
3375         return &thread->kasan_data;
3376 }
3377 #endif
3378
3379 #if CONFIG_KSANCOV
3380 void **
3381 __sanitizer_get_thread_data(thread_t thread)
3382 {
3383         return &thread->ksancov_data;
3384 }
3385 #endif
3386
3387 int64_t
3388 dtrace_calc_thread_recent_vtime(thread_t thread)
3389 {
3390         if (thread != THREAD_NULL) {
3391                 processor_t             processor = current_processor();
3392                 uint64_t                                abstime = mach_absolute_time();
3393                 timer_t                                 timer;
3394
3395                 timer = processor->thread_timer;
3396
3397                 return timer_grab(&(thread->system_timer)) + timer_grab(&(thread->user_timer)) +
3398                        (abstime - timer->tstamp);          /* XXX need interrupts off to prevent missed time? */
3399         } else {
3400                 return 0;
3401         }
3402 }
3403
3404 void
3405 dtrace_set_thread_predcache(thread_t thread, uint32_t predcache)
3406 {
3407         if (thread != THREAD_NULL) {
3408                 thread->t_dtrace_predcache = predcache;
3409         }
3410 }
3411
3412 void
3413 dtrace_set_thread_vtime(thread_t thread, int64_t vtime)
3414 {
3415         if (thread != THREAD_NULL) {
3416                 thread->t_dtrace_vtime = vtime;
3417         }
3418 }
3419
3420 void
3421 dtrace_set_thread_tracing(thread_t thread, int64_t accum)
3422 {
3423         if (thread != THREAD_NULL) {
3424                 thread->t_dtrace_tracing = accum;
3425         }
3426 }
3427
3428 void
3429 dtrace_set_thread_inprobe(thread_t thread, uint16_t inprobe)
3430 {
3431         if (thread != THREAD_NULL) {
3432                 thread->t_dtrace_inprobe = inprobe;
3433         }
3434 }
3435
3436 vm_offset_t
3437 dtrace_set_thread_recover(thread_t thread, vm_offset_t recover)
3438 {
3439         vm_offset_t prev = 0;
3440
3441         if (thread != THREAD_NULL) {
3442                 prev = thread->recover;
3443                 thread->recover = recover;
3444         }
3445         return prev;
3446 }
3447
3448 vm_offset_t
3449 dtrace_sign_and_set_thread_recover(thread_t thread, vm_offset_t recover)
3450 {
3451 #if defined(HAS_APPLE_PAC)
3452         return dtrace_set_thread_recover(thread,
3453                    (vm_address_t)ptrauth_sign_unauthenticated((void *)recover,
3454                    ptrauth_key_function_pointer,
3455                    ptrauth_blend_discriminator(&thread->recover, PAC_DISCRIMINATOR_RECOVER)));
3456 #else /* defined(HAS_APPLE_PAC) */
3457         return dtrace_set_thread_recover(thread, recover);
3458 #endif /* defined(HAS_APPLE_PAC) */
3459 }
3460
3461 void
3462 dtrace_thread_bootstrap(void)
3463 {
3464         task_t task = current_task();
3465
3466         if (task->thread_count == 1) {
3467                 thread_t thread = current_thread();
3468                 if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) {
3469                         thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS;
3470                         DTRACE_PROC(exec__success);
3471                         KDBG(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXEC),
3472                             task_pid(task));
3473                 }
3474                 DTRACE_PROC(start);
3475         }
3476         DTRACE_PROC(lwp__start);
3477 }
3478
3479 void
3480 dtrace_thread_didexec(thread_t thread)
3481 {
3482         thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS;
3483 }
3484 #endif /* CONFIG_DTRACE */