osfmk/kern/thread.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   kern/thread.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub
  61  *      Date:   1986
  62  *
  63  *      Thread management primitives implementation.
  64  */
  65 /*
  66  * Copyright (c) 1993 The University of Utah and
  67  * the Computer Systems Laboratory (CSL).  All rights reserved.
  68  *
  69  * Permission to use, copy, modify and distribute this software and its
  70  * documentation is hereby granted, provided that both the copyright
  71  * notice and this permission notice appear in all copies of the
  72  * software, derivative works or modified versions, and any portions
  73  * thereof, and that both notices appear in supporting documentation.
  74  *
  75  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  76  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  77  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  78  *
  79  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  80  * improvements that they make and grant CSL redistribution rights.
  81  *
  82  */
  83
  84 #include <mach/mach_types.h>
  85 #include <mach/boolean.h>
  86 #include <mach/policy.h>
  87 #include <mach/thread_info.h>
  88 #include <mach/thread_special_ports.h>
  89 #include <mach/thread_status.h>
  90 #include <mach/time_value.h>
  91 #include <mach/vm_param.h>
  92
  93 #include <machine/thread.h>
  94 #include <machine/pal_routines.h>
  95 #include <machine/limits.h>
  96
  97 #include <kern/kern_types.h>
  98 #include <kern/kalloc.h>
  99 #include <kern/cpu_data.h>
 100 #include <kern/counters.h>
 101 #include <kern/extmod_statistics.h>
 102 #include <kern/ipc_mig.h>
 103 #include <kern/ipc_tt.h>
 104 #include <kern/mach_param.h>
 105 #include <kern/machine.h>
 106 #include <kern/misc_protos.h>
 107 #include <kern/processor.h>
 108 #include <kern/queue.h>
 109 #include <kern/sched.h>
 110 #include <kern/sched_prim.h>
 111 #include <kern/sync_lock.h>
 112 #include <kern/syscall_subr.h>
 113 #include <kern/task.h>
 114 #include <kern/thread.h>
 115 #include <kern/thread_group.h>
 116 #include <kern/coalition.h>
 117 #include <kern/host.h>
 118 #include <kern/zalloc.h>
 119 #include <kern/assert.h>
 120 #include <kern/exc_resource.h>
 121 #include <kern/exc_guard.h>
 122 #include <kern/telemetry.h>
 123 #include <kern/policy_internal.h>
 124 #include <kern/turnstile.h>
 125
 126 #include <corpses/task_corpse.h>
 127 #if KPC
 128 #include <kern/kpc.h>
 129 #endif
 130
 131 #if MONOTONIC
 132 #include <kern/monotonic.h>
 133 #include <machine/monotonic.h>
 134 #endif /* MONOTONIC */
 135
 136 #include <ipc/ipc_kmsg.h>
 137 #include <ipc/ipc_port.h>
 138 #include <bank/bank_types.h>
 139
 140 #include <vm/vm_kern.h>
 141 #include <vm/vm_pageout.h>
 142
 143 #include <sys/kdebug.h>
 144 #include <sys/bsdtask_info.h>
 145 #include <mach/sdt.h>
 146 #include <san/kasan.h>
 147
 148 #include <stdatomic.h>
 149
 150 /*
 151  * Exported interfaces
 152  */
 153 #include <mach/task_server.h>
 154 #include <mach/thread_act_server.h>
 155 #include <mach/mach_host_server.h>
 156 #include <mach/host_priv_server.h>
 157 #include <mach/mach_voucher_server.h>
 158 #include <kern/policy_internal.h>
 159
 160 static struct zone                      *thread_zone;
 161 static lck_grp_attr_t           thread_lck_grp_attr;
 162 lck_attr_t                                      thread_lck_attr;
 163 lck_grp_t                                       thread_lck_grp;
 164
 165 struct zone                                     *thread_qos_override_zone;
 166
 167 decl_simple_lock_data(static,thread_stack_lock)
 168 static queue_head_t             thread_stack_queue;
 169
 170 decl_simple_lock_data(static,thread_terminate_lock)
 171 static queue_head_t             thread_terminate_queue;
 172
 173 static queue_head_t             thread_deallocate_queue;
 174
 175 static queue_head_t             turnstile_deallocate_queue;
 176
 177 static queue_head_t             crashed_threads_queue;
 178
 179 static queue_head_t             workq_deallocate_queue;
 180
 181 decl_simple_lock_data(static,thread_exception_lock)
 182 static queue_head_t             thread_exception_queue;
 183
 184 struct thread_exception_elt {
 185         queue_chain_t           elt;
 186         exception_type_t        exception_type;
 187         task_t                  exception_task;
 188         thread_t                exception_thread;
 189 };
 190
 191 static struct thread    thread_template, init_thread;
 192 static void thread_deallocate_enqueue(thread_t thread);
 193 static void thread_deallocate_complete(thread_t thread);
 194
 195 #ifdef MACH_BSD
 196 extern void proc_exit(void *);
 197 extern mach_exception_data_type_t proc_encode_exit_exception_code(void *);
 198 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 199 extern uint64_t get_return_to_kernel_offset_from_proc(void *p);
 200 extern int      proc_selfpid(void);
 201 extern void     proc_name(int, char*, int);
 202 extern char *   proc_name_address(void *p);
 203 #endif /* MACH_BSD */
 204
 205 extern int disable_exc_resource;
 206 extern int audio_active;
 207 extern int debug_task;
 208 int thread_max = CONFIG_THREAD_MAX;     /* Max number of threads */
 209 int task_threadmax = CONFIG_THREAD_MAX;
 210
 211 static uint64_t         thread_unique_id = 100;
 212
 213 struct _thread_ledger_indices thread_ledgers = { -1 };
 214 static ledger_template_t thread_ledger_template = NULL;
 215 static void init_thread_ledgers(void);
 216
 217 #if CONFIG_JETSAM
 218 void jetsam_on_ledger_cpulimit_exceeded(void);
 219 #endif
 220
 221 extern int task_thread_soft_limit;
 222 extern int exc_via_corpse_forking;
 223
 224 #if DEVELOPMENT || DEBUG
 225 extern int exc_resource_threads_enabled;
 226 #endif /* DEVELOPMENT || DEBUG */
 227
 228 /*
 229  * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry.
 230  *
 231  * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user
 232  *  stacktraces, aka micro-stackshots)
 233  */
 234 #define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70
 235
 236 int cpumon_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 237 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void);
 238 #if DEVELOPMENT || DEBUG
 239 void __attribute__((noinline)) SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(task_t, int);
 240 #endif /* DEVELOPMENT || DEBUG */
 241
 242 /*
 243  * The smallest interval over which we support limiting CPU consumption is 1ms
 244  */
 245 #define MINIMUM_CPULIMIT_INTERVAL_MS 1
 246
 247 void
 248 thread_bootstrap(void)
 249 {
 250         /*
 251          *      Fill in a template thread for fast initialization.
 252          */
 253
 254 #if MACH_ASSERT
 255         thread_template.thread_magic = THREAD_MAGIC;
 256 #endif /* MACH_ASSERT */
 257
 258         thread_template.runq = PROCESSOR_NULL;
 259
 260         thread_template.ref_count = 2;
 261
 262         thread_template.reason = AST_NONE;
 263         thread_template.at_safe_point = FALSE;
 264         thread_template.wait_event = NO_EVENT64;
 265         thread_template.waitq = NULL;
 266         thread_template.wait_result = THREAD_WAITING;
 267         thread_template.options = THREAD_ABORTSAFE;
 268         thread_template.state = TH_WAIT | TH_UNINT;
 269         thread_template.wake_active = FALSE;
 270         thread_template.continuation = THREAD_CONTINUE_NULL;
 271         thread_template.parameter = NULL;
 272
 273         thread_template.importance = 0;
 274         thread_template.sched_mode = TH_MODE_NONE;
 275         thread_template.sched_flags = 0;
 276         thread_template.saved_mode = TH_MODE_NONE;
 277         thread_template.safe_release = 0;
 278         thread_template.th_sched_bucket = TH_BUCKET_RUN;
 279
 280         thread_template.sfi_class = SFI_CLASS_UNSPECIFIED;
 281         thread_template.sfi_wait_class = SFI_CLASS_UNSPECIFIED;
 282
 283         thread_template.active = 0;
 284         thread_template.started = 0;
 285         thread_template.static_param = 0;
 286         thread_template.policy_reset = 0;
 287
 288         thread_template.base_pri = BASEPRI_DEFAULT;
 289         thread_template.sched_pri = 0;
 290         thread_template.max_priority = 0;
 291         thread_template.task_priority = 0;
 292         thread_template.promotions = 0;
 293         thread_template.rwlock_count = 0;
 294         thread_template.waiting_for_mutex = NULL;
 295
 296
 297         thread_template.realtime.deadline = UINT64_MAX;
 298
 299         thread_template.quantum_remaining = 0;
 300         thread_template.last_run_time = 0;
 301         thread_template.last_made_runnable_time = THREAD_NOT_RUNNABLE;
 302         thread_template.last_basepri_change_time = THREAD_NOT_RUNNABLE;
 303         thread_template.same_pri_latency = 0;
 304
 305         thread_template.computation_metered = 0;
 306         thread_template.computation_epoch = 0;
 307
 308 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
 309         thread_template.sched_stamp = 0;
 310         thread_template.pri_shift = INT8_MAX;
 311         thread_template.sched_usage = 0;
 312         thread_template.cpu_usage = thread_template.cpu_delta = 0;
 313 #endif
 314         thread_template.c_switch = thread_template.p_switch = thread_template.ps_switch = 0;
 315
 316 #if MONOTONIC
 317         memset(&thread_template.t_monotonic, 0,
 318                         sizeof(thread_template.t_monotonic));
 319 #endif /* MONOTONIC */
 320
 321         thread_template.bound_processor = PROCESSOR_NULL;
 322         thread_template.last_processor = PROCESSOR_NULL;
 323
 324         thread_template.sched_call = NULL;
 325
 326         timer_init(&thread_template.user_timer);
 327         timer_init(&thread_template.system_timer);
 328         timer_init(&thread_template.ptime);
 329         timer_init(&thread_template.runnable_timer);
 330         thread_template.user_timer_save = 0;
 331         thread_template.system_timer_save = 0;
 332         thread_template.vtimer_user_save = 0;
 333         thread_template.vtimer_prof_save = 0;
 334         thread_template.vtimer_rlim_save = 0;
 335         thread_template.vtimer_qos_save  = 0;
 336
 337 #if CONFIG_SCHED_SFI
 338         thread_template.wait_sfi_begin_time = 0;
 339 #endif
 340
 341         thread_template.wait_timer_is_set = FALSE;
 342         thread_template.wait_timer_active = 0;
 343
 344         thread_template.depress_timer_active = 0;
 345
 346         thread_template.recover = (vm_offset_t)NULL;
 347
 348         thread_template.map = VM_MAP_NULL;
 349 #if DEVELOPMENT || DEBUG
 350         thread_template.pmap_footprint_suspended = FALSE;
 351 #endif /* DEVELOPMENT || DEBUG */
 352
 353 #if CONFIG_DTRACE
 354         thread_template.t_dtrace_predcache = 0;
 355         thread_template.t_dtrace_vtime = 0;
 356         thread_template.t_dtrace_tracing = 0;
 357 #endif /* CONFIG_DTRACE */
 358
 359 #if KPERF
 360         thread_template.kperf_flags = 0;
 361         thread_template.kperf_pet_gen = 0;
 362         thread_template.kperf_c_switch = 0;
 363         thread_template.kperf_pet_cnt = 0;
 364 #endif
 365
 366 #if KPC
 367         thread_template.kpc_buf = NULL;
 368 #endif
 369
 370 #if HYPERVISOR
 371         thread_template.hv_thread_target = NULL;
 372 #endif /* HYPERVISOR */
 373
 374 #if (DEVELOPMENT || DEBUG)
 375         thread_template.t_page_creation_throttled_hard = 0;
 376         thread_template.t_page_creation_throttled_soft = 0;
 377 #endif /* DEVELOPMENT || DEBUG */
 378         thread_template.t_page_creation_throttled = 0;
 379         thread_template.t_page_creation_count = 0;
 380         thread_template.t_page_creation_time = 0;
 381
 382         thread_template.affinity_set = NULL;
 383
 384         thread_template.syscalls_unix = 0;
 385         thread_template.syscalls_mach = 0;
 386
 387         thread_template.t_ledger = LEDGER_NULL;
 388         thread_template.t_threadledger = LEDGER_NULL;
 389         thread_template.t_bankledger = LEDGER_NULL;
 390         thread_template.t_deduct_bank_ledger_time = 0;
 391
 392         thread_template.requested_policy = (struct thread_requested_policy) {};
 393         thread_template.effective_policy = (struct thread_effective_policy) {};
 394
 395         bzero(&thread_template.overrides, sizeof(thread_template.overrides));
 396         thread_template.sync_ipc_overrides = 0;
 397
 398         thread_template.iotier_override = THROTTLE_LEVEL_NONE;
 399         thread_template.thread_io_stats = NULL;
 400 #if CONFIG_EMBEDDED
 401         thread_template.taskwatch = NULL;
 402 #endif /* CONFIG_EMBEDDED */
 403         thread_template.thread_callout_interrupt_wakeups = thread_template.thread_callout_platform_idle_wakeups = 0;
 404
 405         thread_template.thread_timer_wakeups_bin_1 = thread_template.thread_timer_wakeups_bin_2 = 0;
 406         thread_template.callout_woken_from_icontext = thread_template.callout_woken_from_platform_idle = 0;
 407
 408         thread_template.thread_tag = 0;
 409
 410         thread_template.ith_voucher_name = MACH_PORT_NULL;
 411         thread_template.ith_voucher = IPC_VOUCHER_NULL;
 412
 413         thread_template.th_work_interval = NULL;
 414
 415         init_thread = thread_template;
 416         machine_set_current_thread(&init_thread);
 417 }
 418
 419 extern boolean_t allow_qos_policy_set;
 420
 421 void
 422 thread_init(void)
 423 {
 424         thread_zone = zinit(
 425                         sizeof(struct thread),
 426                         thread_max * sizeof(struct thread),
 427                         THREAD_CHUNK * sizeof(struct thread),
 428                         "threads");
 429
 430         thread_qos_override_zone = zinit(
 431                 sizeof(struct thread_qos_override),
 432                 4 * thread_max * sizeof(struct thread_qos_override),
 433                 PAGE_SIZE,
 434                 "thread qos override");
 435         zone_change(thread_qos_override_zone, Z_EXPAND, TRUE);
 436         zone_change(thread_qos_override_zone, Z_COLLECT, TRUE);
 437         zone_change(thread_qos_override_zone, Z_CALLERACCT, FALSE);
 438         zone_change(thread_qos_override_zone, Z_NOENCRYPT, TRUE);
 439
 440         lck_grp_attr_setdefault(&thread_lck_grp_attr);
 441         lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr);
 442         lck_attr_setdefault(&thread_lck_attr);
 443
 444         stack_init();
 445
 446         thread_policy_init();
 447
 448         /*
 449          *      Initialize any machine-dependent
 450          *      per-thread structures necessary.
 451          */
 452         machine_thread_init();
 453
 454         if (!PE_parse_boot_argn("cpumon_ustackshots_trigger_pct", &cpumon_ustackshots_trigger_pct,
 455                 sizeof (cpumon_ustackshots_trigger_pct))) {
 456                 cpumon_ustackshots_trigger_pct = CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT;
 457         }
 458
 459         PE_parse_boot_argn("-qos-policy-allow", &allow_qos_policy_set, sizeof(allow_qos_policy_set));
 460
 461         init_thread_ledgers();
 462 }
 463
 464 boolean_t
 465 thread_is_active(thread_t thread)
 466 {
 467         return (thread->active);
 468 }
 469
 470 void
 471 thread_corpse_continue(void)
 472 {
 473         thread_t thread = current_thread();
 474
 475         thread_terminate_internal(thread);
 476
 477         /*
 478          * Handle the thread termination directly
 479          * here instead of returning to userspace.
 480          */
 481         assert(thread->active == FALSE);
 482         thread_ast_clear(thread, AST_APC);
 483         thread_apc_ast(thread);
 484
 485         panic("thread_corpse_continue");
 486         /*NOTREACHED*/
 487 }
 488
 489 static void
 490 thread_terminate_continue(void)
 491 {
 492         panic("thread_terminate_continue");
 493         /*NOTREACHED*/
 494 }
 495
 496 /*
 497  *      thread_terminate_self:
 498  */
 499 void
 500 thread_terminate_self(void)
 501 {
 502         thread_t                thread = current_thread();
 503         task_t                  task;
 504         int threadcnt;
 505
 506         pal_thread_terminate_self(thread);
 507
 508         DTRACE_PROC(lwp__exit);
 509
 510         thread_mtx_lock(thread);
 511
 512         ipc_thread_disable(thread);
 513
 514         thread_mtx_unlock(thread);
 515
 516         thread_sched_call(thread, NULL);
 517
 518         spl_t s = splsched();
 519         thread_lock(thread);
 520
 521         thread_depress_abort_locked(thread);
 522
 523         thread_unlock(thread);
 524         splx(s);
 525
 526 #if CONFIG_EMBEDDED
 527         thead_remove_taskwatch(thread);
 528 #endif /* CONFIG_EMBEDDED */
 529
 530         work_interval_thread_terminate(thread);
 531
 532         thread_mtx_lock(thread);
 533
 534         thread_policy_reset(thread);
 535
 536         thread_mtx_unlock(thread);
 537
 538         bank_swap_thread_bank_ledger(thread, NULL);
 539
 540         if (kdebug_enable && bsd_hasthreadname(thread->uthread)) {
 541                 char threadname[MAXTHREADNAMESIZE];
 542                 bsd_getthreadname(thread->uthread, threadname);
 543                 kernel_debug_string_simple(TRACE_STRING_THREADNAME_PREV, threadname);
 544         }
 545
 546         task = thread->task;
 547         uthread_cleanup(task, thread->uthread, task->bsd_info);
 548
 549         if (kdebug_enable && task->bsd_info && !task_is_exec_copy(task)) {
 550                 /* trace out pid before we sign off */
 551                 long dbg_arg1 = 0;
 552                 long dbg_arg2 = 0;
 553
 554                 kdbg_trace_data(thread->task->bsd_info, &dbg_arg1, &dbg_arg2);
 555                 KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE_PID, dbg_arg1, dbg_arg2);
 556         }
 557
 558         /*
 559          * After this subtraction, this thread should never access
 560          * task->bsd_info unless it got 0 back from the hw_atomic_sub.  It
 561          * could be racing with other threads to be the last thread in the
 562          * process, and the last thread in the process will tear down the proc
 563          * structure and zero-out task->bsd_info.
 564          */
 565         threadcnt = hw_atomic_sub(&task->active_thread_count, 1);
 566
 567         /*
 568          * If we are the last thread to terminate and the task is
 569          * associated with a BSD process, perform BSD process exit.
 570          */
 571         if (threadcnt == 0 && task->bsd_info != NULL && !task_is_exec_copy(task)) {
 572                 mach_exception_data_type_t subcode = 0;
 573                 if (kdebug_enable) {
 574                         /* since we're the last thread in this process, trace out the command name too */
 575                         long args[4] = {};
 576                         kdbg_trace_string(thread->task->bsd_info, &args[0], &args[1], &args[2], &args[3]);
 577                         KDBG_RELEASE(TRACE_STRING_PROC_EXIT, args[0], args[1], args[2], args[3]);
 578                 }
 579
 580                 /* Get the exit reason before proc_exit */
 581                 subcode = proc_encode_exit_exception_code(task->bsd_info);
 582                 proc_exit(task->bsd_info);
 583                 /*
 584                  * if there is crash info in task
 585                  * then do the deliver action since this is
 586                  * last thread for this task.
 587                  */
 588                 if (task->corpse_info) {
 589                         task_deliver_crash_notification(task, current_thread(), EXC_RESOURCE, subcode);
 590                 }
 591         }
 592
 593         if (threadcnt == 0) {
 594                 task_lock(task);
 595                 if (task_is_a_corpse_fork(task)) {
 596                         thread_wakeup((event_t)&task->active_thread_count);
 597                 }
 598                 task_unlock(task);
 599         }
 600
 601         uthread_cred_free(thread->uthread);
 602
 603         s = splsched();
 604         thread_lock(thread);
 605
 606         /*
 607          * Ensure that the depress timer is no longer enqueued,
 608          * so the timer (stored in the thread) can be safely deallocated
 609          *
 610          * TODO: build timer_call_cancel_wait
 611          */
 612
 613         assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) == 0);
 614
 615         uint32_t delay_us = 1;
 616
 617         while (thread->depress_timer_active > 0) {
 618                 thread_unlock(thread);
 619                 splx(s);
 620
 621                 delay(delay_us++);
 622
 623                 if (delay_us > USEC_PER_SEC)
 624                         panic("depress timer failed to inactivate!"
 625                               "thread: %p depress_timer_active: %d",
 626                               thread, thread->depress_timer_active);
 627
 628                 s = splsched();
 629                 thread_lock(thread);
 630         }
 631
 632         /*
 633          *      Cancel wait timer, and wait for
 634          *      concurrent expirations.
 635          */
 636         if (thread->wait_timer_is_set) {
 637                 thread->wait_timer_is_set = FALSE;
 638
 639                 if (timer_call_cancel(&thread->wait_timer))
 640                         thread->wait_timer_active--;
 641         }
 642
 643         delay_us = 1;
 644
 645         while (thread->wait_timer_active > 0) {
 646                 thread_unlock(thread);
 647                 splx(s);
 648
 649                 delay(delay_us++);
 650
 651                 if (delay_us > USEC_PER_SEC)
 652                         panic("wait timer failed to inactivate!"
 653                               "thread: %p wait_timer_active: %d",
 654                               thread, thread->wait_timer_active);
 655
 656                 s = splsched();
 657                 thread_lock(thread);
 658         }
 659
 660         /*
 661          *      If there is a reserved stack, release it.
 662          */
 663         if (thread->reserved_stack != 0) {
 664                 stack_free_reserved(thread);
 665                 thread->reserved_stack = 0;
 666         }
 667
 668         /*
 669          *      Mark thread as terminating, and block.
 670          */
 671         thread->state |= TH_TERMINATE;
 672         thread_mark_wait_locked(thread, THREAD_UNINT);
 673
 674         assert((thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED) == 0);
 675         assert((thread->sched_flags & TH_SFLAG_RW_PROMOTED) == 0);
 676         assert((thread->sched_flags & TH_SFLAG_EXEC_PROMOTED) == 0);
 677         assert((thread->sched_flags & TH_SFLAG_PROMOTED) == 0);
 678         assert(thread->promotions == 0);
 679         assert(thread->was_promoted_on_wakeup == 0);
 680         assert(thread->waiting_for_mutex == NULL);
 681         assert(thread->rwlock_count == 0);
 682
 683         thread_unlock(thread);
 684         /* splsched */
 685
 686         thread_block((thread_continue_t)thread_terminate_continue);
 687         /*NOTREACHED*/
 688 }
 689
 690 /* Drop a thread refcount safely without triggering a zfree */
 691 void
 692 thread_deallocate_safe(thread_t thread)
 693 {
 694         __assert_only uint32_t          th_ref_count;
 695
 696         if (thread == THREAD_NULL)
 697                 return;
 698
 699         assert_thread_magic(thread);
 700
 701         if (__probable(atomic_fetch_sub_explicit(&thread->ref_count, 1,
 702                        memory_order_release) - 1 > 0)) {
 703                 return;
 704         }
 705
 706         th_ref_count = atomic_load_explicit(&thread->ref_count, memory_order_acquire);
 707         assert(th_ref_count == 0);
 708
 709         /* enqueue the thread for thread deallocate deamon to call thread_deallocate_complete */
 710         thread_deallocate_enqueue(thread);
 711 }
 712
 713 void
 714 thread_deallocate(
 715         thread_t                        thread)
 716 {
 717         __assert_only uint32_t          th_ref_count;
 718
 719         if (thread == THREAD_NULL)
 720                 return;
 721
 722         assert_thread_magic(thread);
 723
 724         if (__probable(atomic_fetch_sub_explicit(&thread->ref_count, 1,
 725                        memory_order_release) - 1 > 0)) {
 726                 return;
 727         }
 728
 729         th_ref_count = atomic_load_explicit(&thread->ref_count, memory_order_acquire);
 730         assert(th_ref_count == 0);
 731
 732         thread_deallocate_complete(thread);
 733 }
 734
 735 void
 736 thread_deallocate_complete(
 737         thread_t                        thread)
 738 {
 739         task_t                          task;
 740
 741         assert_thread_magic(thread);
 742
 743         assert(thread->ref_count == 0);
 744
 745         assert(thread_owned_workloops_count(thread) == 0);
 746
 747         if (!(thread->state & TH_TERMINATE2))
 748                 panic("thread_deallocate: thread not properly terminated\n");
 749
 750         assert(thread->runq == PROCESSOR_NULL);
 751
 752 #if KPC
 753         kpc_thread_destroy(thread);
 754 #endif
 755
 756         ipc_thread_terminate(thread);
 757
 758         proc_thread_qos_deallocate(thread);
 759
 760         task = thread->task;
 761
 762 #ifdef MACH_BSD
 763         {
 764                 void *ut = thread->uthread;
 765
 766                 thread->uthread = NULL;
 767                 uthread_zone_free(ut);
 768         }
 769 #endif /* MACH_BSD */
 770
 771         if (thread->t_ledger)
 772                 ledger_dereference(thread->t_ledger);
 773         if (thread->t_threadledger)
 774                 ledger_dereference(thread->t_threadledger);
 775
 776         assert(thread->turnstile != TURNSTILE_NULL);
 777         if (thread->turnstile)
 778                 turnstile_deallocate(thread->turnstile);
 779
 780         if (IPC_VOUCHER_NULL != thread->ith_voucher)
 781                 ipc_voucher_release(thread->ith_voucher);
 782
 783         if (thread->thread_io_stats)
 784                 kfree(thread->thread_io_stats, sizeof(struct io_stat_info));
 785
 786         if (thread->kernel_stack != 0)
 787                 stack_free(thread);
 788
 789         lck_mtx_destroy(&thread->mutex, &thread_lck_grp);
 790         machine_thread_destroy(thread);
 791
 792         task_deallocate(task);
 793
 794 #if MACH_ASSERT
 795         assert_thread_magic(thread);
 796         thread->thread_magic = 0;
 797 #endif /* MACH_ASSERT */
 798
 799         zfree(thread_zone, thread);
 800 }
 801
 802 void
 803 thread_starts_owning_workloop(thread_t thread)
 804 {
 805         atomic_fetch_add_explicit(&thread->kqwl_owning_count, 1,
 806                         memory_order_relaxed);
 807 }
 808
 809 void
 810 thread_ends_owning_workloop(thread_t thread)
 811 {
 812         __assert_only uint32_t count;
 813         count = atomic_fetch_sub_explicit(&thread->kqwl_owning_count, 1,
 814                         memory_order_relaxed);
 815         assert(count > 0);
 816 }
 817
 818 uint32_t
 819 thread_owned_workloops_count(thread_t thread)
 820 {
 821         return atomic_load_explicit(&thread->kqwl_owning_count,
 822                         memory_order_relaxed);
 823 }
 824
 825 /*
 826  *      thread_inspect_deallocate:
 827  *
 828  *      Drop a thread inspection reference.
 829  */
 830 void
 831 thread_inspect_deallocate(
 832         thread_inspect_t                thread_inspect)
 833 {
 834         return(thread_deallocate((thread_t)thread_inspect));
 835 }
 836
 837 /*
 838  *      thread_exception_daemon:
 839  *
 840  *      Deliver EXC_{RESOURCE,GUARD} exception
 841  */
 842 static void
 843 thread_exception_daemon(void)
 844 {
 845         struct thread_exception_elt *elt;
 846         task_t task;
 847         thread_t thread;
 848         exception_type_t etype;
 849
 850         simple_lock(&thread_exception_lock);
 851         while ((elt = (struct thread_exception_elt *)dequeue_head(&thread_exception_queue)) != NULL) {
 852                 simple_unlock(&thread_exception_lock);
 853
 854                 etype = elt->exception_type;
 855                 task = elt->exception_task;
 856                 thread = elt->exception_thread;
 857                 assert_thread_magic(thread);
 858
 859                 kfree(elt, sizeof (*elt));
 860
 861                 /* wait for all the threads in the task to terminate */
 862                 task_lock(task);
 863                 task_wait_till_threads_terminate_locked(task);
 864                 task_unlock(task);
 865
 866                 /* Consumes the task ref returned by task_generate_corpse_internal */
 867                 task_deallocate(task);
 868                 /* Consumes the thread ref returned by task_generate_corpse_internal */
 869                 thread_deallocate(thread);
 870
 871                 /* Deliver the notification, also clears the corpse. */
 872                 task_deliver_crash_notification(task, thread, etype, 0);
 873
 874                 simple_lock(&thread_exception_lock);
 875         }
 876
 877         assert_wait((event_t)&thread_exception_queue, THREAD_UNINT);
 878         simple_unlock(&thread_exception_lock);
 879
 880         thread_block((thread_continue_t)thread_exception_daemon);
 881 }
 882
 883 /*
 884  *      thread_exception_enqueue:
 885  *
 886  *      Enqueue a corpse port to be delivered an EXC_{RESOURCE,GUARD}.
 887  */
 888 void
 889 thread_exception_enqueue(
 890         task_t          task,
 891         thread_t        thread,
 892         exception_type_t etype)
 893 {
 894         assert(EXC_RESOURCE == etype || EXC_GUARD == etype);
 895         struct thread_exception_elt *elt = kalloc(sizeof (*elt));
 896         elt->exception_type = etype;
 897         elt->exception_task = task;
 898         elt->exception_thread = thread;
 899
 900         simple_lock(&thread_exception_lock);
 901         enqueue_tail(&thread_exception_queue, (queue_entry_t)elt);
 902         simple_unlock(&thread_exception_lock);
 903
 904         thread_wakeup((event_t)&thread_exception_queue);
 905 }
 906
 907 /*
 908  *      thread_copy_resource_info
 909  *
 910  *      Copy the resource info counters from source
 911  *      thread to destination thread.
 912  */
 913 void
 914 thread_copy_resource_info(
 915         thread_t dst_thread,
 916         thread_t src_thread)
 917 {
 918         dst_thread->c_switch = src_thread->c_switch;
 919         dst_thread->p_switch = src_thread->p_switch;
 920         dst_thread->ps_switch = src_thread->ps_switch;
 921         dst_thread->precise_user_kernel_time = src_thread->precise_user_kernel_time;
 922         dst_thread->user_timer = src_thread->user_timer;
 923         dst_thread->user_timer_save = src_thread->user_timer_save;
 924         dst_thread->system_timer = src_thread->system_timer;
 925         dst_thread->system_timer_save = src_thread->system_timer_save;
 926         dst_thread->runnable_timer = src_thread->runnable_timer;
 927         dst_thread->vtimer_user_save = src_thread->vtimer_user_save;
 928         dst_thread->vtimer_prof_save = src_thread->vtimer_prof_save;
 929         dst_thread->vtimer_rlim_save = src_thread->vtimer_rlim_save;
 930         dst_thread->vtimer_qos_save = src_thread->vtimer_qos_save;
 931         dst_thread->syscalls_unix = src_thread->syscalls_unix;
 932         dst_thread->syscalls_mach = src_thread->syscalls_mach;
 933         ledger_rollup(dst_thread->t_threadledger, src_thread->t_threadledger);
 934         *dst_thread->thread_io_stats = *src_thread->thread_io_stats;
 935 }
 936
 937 /*
 938  *      thread_terminate_daemon:
 939  *
 940  *      Perform final clean up for terminating threads.
 941  */
 942 static void
 943 thread_terminate_daemon(void)
 944 {
 945         thread_t        self, thread;
 946         task_t          task;
 947
 948         self = current_thread();
 949         self->options |= TH_OPT_SYSTEM_CRITICAL;
 950
 951         (void)splsched();
 952         simple_lock(&thread_terminate_lock);
 953
 954 thread_terminate_start:
 955         while ((thread = qe_dequeue_head(&thread_terminate_queue, struct thread, runq_links)) != THREAD_NULL) {
 956                 assert_thread_magic(thread);
 957
 958                 /*
 959                  * if marked for crash reporting, skip reaping.
 960                  * The corpse delivery thread will clear bit and enqueue
 961                  * for reaping when done
 962                  */
 963                 if (thread->inspection){
 964                         enqueue_tail(&crashed_threads_queue, &thread->runq_links);
 965                         continue;
 966                 }
 967
 968                 simple_unlock(&thread_terminate_lock);
 969                 (void)spllo();
 970
 971                 task = thread->task;
 972
 973                 task_lock(task);
 974                 task->total_user_time += timer_grab(&thread->user_timer);
 975                 task->total_ptime += timer_grab(&thread->ptime);
 976                 task->total_runnable_time += timer_grab(&thread->runnable_timer);
 977                 if (thread->precise_user_kernel_time) {
 978                         task->total_system_time += timer_grab(&thread->system_timer);
 979                 } else {
 980                         task->total_user_time += timer_grab(&thread->system_timer);
 981                 }
 982
 983                 task->c_switch += thread->c_switch;
 984                 task->p_switch += thread->p_switch;
 985                 task->ps_switch += thread->ps_switch;
 986
 987                 task->syscalls_unix += thread->syscalls_unix;
 988                 task->syscalls_mach += thread->syscalls_mach;
 989
 990                 task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
 991                 task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
 992                 task->task_gpu_ns += ml_gpu_stat(thread);
 993                 task->task_energy += ml_energy_stat(thread);
 994
 995 #if MONOTONIC
 996                 mt_terminate_update(task, thread);
 997 #endif /* MONOTONIC */
 998
 999                 thread_update_qos_cpu_time(thread);
1000
1001                 queue_remove(&task->threads, thread, thread_t, task_threads);
1002                 task->thread_count--;
1003
1004                 /*
1005                  * If the task is being halted, and there is only one thread
1006                  * left in the task after this one, then wakeup that thread.
1007                  */
1008                 if (task->thread_count == 1 && task->halting)
1009                         thread_wakeup((event_t)&task->halting);
1010
1011                 task_unlock(task);
1012
1013                 lck_mtx_lock(&tasks_threads_lock);
1014                 queue_remove(&threads, thread, thread_t, threads);
1015                 threads_count--;
1016                 lck_mtx_unlock(&tasks_threads_lock);
1017
1018                 thread_deallocate(thread);
1019
1020                 (void)splsched();
1021                 simple_lock(&thread_terminate_lock);
1022         }
1023
1024         while ((thread = qe_dequeue_head(&thread_deallocate_queue, struct thread, runq_links)) != THREAD_NULL) {
1025                 assert_thread_magic(thread);
1026
1027                 simple_unlock(&thread_terminate_lock);
1028                 (void)spllo();
1029
1030                 thread_deallocate_complete(thread);
1031
1032                 (void)splsched();
1033                 simple_lock(&thread_terminate_lock);
1034         }
1035
1036         struct turnstile *turnstile;
1037         while ((turnstile = qe_dequeue_head(&turnstile_deallocate_queue, struct turnstile, ts_deallocate_link)) != TURNSTILE_NULL) {
1038
1039                 simple_unlock(&thread_terminate_lock);
1040                 (void)spllo();
1041
1042                 turnstile_destroy(turnstile);
1043
1044                 (void)splsched();
1045                 simple_lock(&thread_terminate_lock);
1046         }
1047
1048         queue_entry_t qe;
1049
1050         /*
1051          * see workq_deallocate_enqueue: struct workqueue is opaque to thread.c and
1052          * we just link pieces of memory here
1053          */
1054         while ((qe = dequeue_head(&workq_deallocate_queue))) {
1055                 simple_unlock(&thread_terminate_lock);
1056                 (void)spllo();
1057
1058                 workq_destroy((struct workqueue *)qe);
1059
1060                 (void)splsched();
1061                 simple_lock(&thread_terminate_lock);
1062         }
1063
1064         /*
1065          * Check if something enqueued in thread terminate/deallocate queue
1066          * while processing workq deallocate queue
1067          */
1068         if (!queue_empty(&thread_terminate_queue) ||
1069             !queue_empty(&thread_deallocate_queue) ||
1070             !queue_empty(&turnstile_deallocate_queue))
1071                 goto thread_terminate_start;
1072
1073         assert_wait((event_t)&thread_terminate_queue, THREAD_UNINT);
1074         simple_unlock(&thread_terminate_lock);
1075         /* splsched */
1076
1077         self->options &= ~TH_OPT_SYSTEM_CRITICAL;
1078         thread_block((thread_continue_t)thread_terminate_daemon);
1079         /*NOTREACHED*/
1080 }
1081
1082 /*
1083  *      thread_terminate_enqueue:
1084  *
1085  *      Enqueue a terminating thread for final disposition.
1086  *
1087  *      Called at splsched.
1088  */
1089 void
1090 thread_terminate_enqueue(
1091         thread_t                thread)
1092 {
1093         KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE, thread->thread_id);
1094
1095         simple_lock(&thread_terminate_lock);
1096         enqueue_tail(&thread_terminate_queue, &thread->runq_links);
1097         simple_unlock(&thread_terminate_lock);
1098
1099         thread_wakeup((event_t)&thread_terminate_queue);
1100 }
1101
1102 /*
1103  *      thread_deallocate_enqueue:
1104  *
1105  *      Enqueue a thread for final deallocation.
1106  */
1107 static void
1108 thread_deallocate_enqueue(
1109         thread_t                thread)
1110 {
1111         spl_t s = splsched();
1112
1113         simple_lock(&thread_terminate_lock);
1114         enqueue_tail(&thread_deallocate_queue, &thread->runq_links);
1115         simple_unlock(&thread_terminate_lock);
1116
1117         thread_wakeup((event_t)&thread_terminate_queue);
1118         splx(s);
1119 }
1120
1121 /*
1122  *      turnstile_deallocate_enqueue:
1123  *
1124  *      Enqueue a turnstile for final deallocation.
1125  */
1126 void
1127 turnstile_deallocate_enqueue(
1128         struct turnstile *turnstile)
1129 {
1130         spl_t s = splsched();
1131
1132         simple_lock(&thread_terminate_lock);
1133         enqueue_tail(&turnstile_deallocate_queue, &turnstile->ts_deallocate_link);
1134         simple_unlock(&thread_terminate_lock);
1135
1136         thread_wakeup((event_t)&thread_terminate_queue);
1137         splx(s);
1138 }
1139
1140 /*
1141  *      workq_deallocate_enqueue:
1142  *
1143  *      Enqueue a workqueue for final deallocation.
1144  */
1145 void
1146 workq_deallocate_enqueue(
1147         struct workqueue *wq)
1148 {
1149         spl_t s = splsched();
1150
1151         simple_lock(&thread_terminate_lock);
1152         /*
1153          * this is just to delay a zfree(), so we link the memory with no regards
1154          * for how the struct looks like.
1155          */
1156         enqueue_tail(&workq_deallocate_queue, (queue_entry_t)wq);
1157         simple_unlock(&thread_terminate_lock);
1158
1159         thread_wakeup((event_t)&thread_terminate_queue);
1160         splx(s);
1161 }
1162
1163 /*
1164  * thread_terminate_crashed_threads:
1165  * walk the list of crashed threads and put back set of threads
1166  * who are no longer being inspected.
1167  */
1168 void
1169 thread_terminate_crashed_threads()
1170 {
1171         thread_t th_remove;
1172         boolean_t should_wake_terminate_queue = FALSE;
1173         spl_t s = splsched();
1174
1175         simple_lock(&thread_terminate_lock);
1176         /*
1177          * loop through the crashed threads queue
1178          * to put any threads that are not being inspected anymore
1179          */
1180
1181         qe_foreach_element_safe(th_remove, &crashed_threads_queue, runq_links) {
1182                 /* make sure current_thread is never in crashed queue */
1183                 assert(th_remove != current_thread());
1184
1185                 if (th_remove->inspection == FALSE) {
1186                         re_queue_tail(&thread_terminate_queue, &th_remove->runq_links);
1187                         should_wake_terminate_queue = TRUE;
1188                 }
1189         }
1190
1191         simple_unlock(&thread_terminate_lock);
1192         splx(s);
1193         if (should_wake_terminate_queue == TRUE) {
1194                 thread_wakeup((event_t)&thread_terminate_queue);
1195         }
1196 }
1197
1198 /*
1199  *      thread_stack_daemon:
1200  *
1201  *      Perform stack allocation as required due to
1202  *      invoke failures.
1203  */
1204 static void
1205 thread_stack_daemon(void)
1206 {
1207         thread_t                thread;
1208         spl_t                   s;
1209
1210         s = splsched();
1211         simple_lock(&thread_stack_lock);
1212
1213         while ((thread = qe_dequeue_head(&thread_stack_queue, struct thread, runq_links)) != THREAD_NULL) {
1214                 assert_thread_magic(thread);
1215
1216                 simple_unlock(&thread_stack_lock);
1217                 splx(s);
1218
1219                 /* allocate stack with interrupts enabled so that we can call into VM */
1220                 stack_alloc(thread);
1221
1222                 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_END, thread_tid(thread), 0, 0, 0, 0);
1223
1224                 s = splsched();
1225                 thread_lock(thread);
1226                 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1227                 thread_unlock(thread);
1228
1229                 simple_lock(&thread_stack_lock);
1230         }
1231
1232         assert_wait((event_t)&thread_stack_queue, THREAD_UNINT);
1233         simple_unlock(&thread_stack_lock);
1234         splx(s);
1235
1236         thread_block((thread_continue_t)thread_stack_daemon);
1237         /*NOTREACHED*/
1238 }
1239
1240 /*
1241  *      thread_stack_enqueue:
1242  *
1243  *      Enqueue a thread for stack allocation.
1244  *
1245  *      Called at splsched.
1246  */
1247 void
1248 thread_stack_enqueue(
1249         thread_t                thread)
1250 {
1251         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_START, thread_tid(thread), 0, 0, 0, 0);
1252         assert_thread_magic(thread);
1253
1254         simple_lock(&thread_stack_lock);
1255         enqueue_tail(&thread_stack_queue, &thread->runq_links);
1256         simple_unlock(&thread_stack_lock);
1257
1258         thread_wakeup((event_t)&thread_stack_queue);
1259 }
1260
1261 void
1262 thread_daemon_init(void)
1263 {
1264         kern_return_t   result;
1265         thread_t        thread = NULL;
1266
1267         simple_lock_init(&thread_terminate_lock, 0);
1268         queue_init(&thread_terminate_queue);
1269         queue_init(&thread_deallocate_queue);
1270         queue_init(&workq_deallocate_queue);
1271         queue_init(&turnstile_deallocate_queue);
1272         queue_init(&crashed_threads_queue);
1273
1274         result = kernel_thread_start_priority((thread_continue_t)thread_terminate_daemon, NULL, MINPRI_KERNEL, &thread);
1275         if (result != KERN_SUCCESS)
1276                 panic("thread_daemon_init: thread_terminate_daemon");
1277
1278         thread_deallocate(thread);
1279
1280         simple_lock_init(&thread_stack_lock, 0);
1281         queue_init(&thread_stack_queue);
1282
1283         result = kernel_thread_start_priority((thread_continue_t)thread_stack_daemon, NULL, BASEPRI_PREEMPT_HIGH, &thread);
1284         if (result != KERN_SUCCESS)
1285                 panic("thread_daemon_init: thread_stack_daemon");
1286
1287         thread_deallocate(thread);
1288
1289         simple_lock_init(&thread_exception_lock, 0);
1290         queue_init(&thread_exception_queue);
1291
1292         result = kernel_thread_start_priority((thread_continue_t)thread_exception_daemon, NULL, MINPRI_KERNEL, &thread);
1293         if (result != KERN_SUCCESS)
1294                 panic("thread_daemon_init: thread_exception_daemon");
1295
1296         thread_deallocate(thread);
1297 }
1298
1299 #define TH_OPTION_NONE          0x00
1300 #define TH_OPTION_NOCRED        0x01
1301 #define TH_OPTION_NOSUSP        0x02
1302 #define TH_OPTION_WORKQ         0x04
1303
1304 /*
1305  * Create a new thread.
1306  * Doesn't start the thread running.
1307  *
1308  * Task and tasks_threads_lock are returned locked on success.
1309  */
1310 static kern_return_t
1311 thread_create_internal(
1312         task_t                                  parent_task,
1313         integer_t                               priority,
1314         thread_continue_t               continuation,
1315         void                                    *parameter,
1316         int                                             options,
1317         thread_t                                *out_thread)
1318 {
1319         thread_t                                new_thread;
1320         static thread_t                 first_thread;
1321
1322         /*
1323          *      Allocate a thread and initialize static fields
1324          */
1325         if (first_thread == THREAD_NULL)
1326                 new_thread = first_thread = current_thread();
1327         else
1328                 new_thread = (thread_t)zalloc(thread_zone);
1329         if (new_thread == THREAD_NULL)
1330                 return (KERN_RESOURCE_SHORTAGE);
1331
1332         if (new_thread != first_thread)
1333                 *new_thread = thread_template;
1334
1335 #ifdef MACH_BSD
1336         new_thread->uthread = uthread_alloc(parent_task, new_thread, (options & TH_OPTION_NOCRED) != 0);
1337         if (new_thread->uthread == NULL) {
1338 #if MACH_ASSERT
1339                 new_thread->thread_magic = 0;
1340 #endif /* MACH_ASSERT */
1341
1342                 zfree(thread_zone, new_thread);
1343                 return (KERN_RESOURCE_SHORTAGE);
1344         }
1345 #endif  /* MACH_BSD */
1346
1347         if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) {
1348 #ifdef MACH_BSD
1349                 void *ut = new_thread->uthread;
1350
1351                 new_thread->uthread = NULL;
1352                 /* cred free may not be necessary */
1353                 uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1354                 uthread_cred_free(ut);
1355                 uthread_zone_free(ut);
1356 #endif  /* MACH_BSD */
1357
1358 #if MACH_ASSERT
1359                 new_thread->thread_magic = 0;
1360 #endif /* MACH_ASSERT */
1361
1362                 zfree(thread_zone, new_thread);
1363                 return (KERN_FAILURE);
1364         }
1365
1366         new_thread->task = parent_task;
1367
1368         thread_lock_init(new_thread);
1369         wake_lock_init(new_thread);
1370
1371         lck_mtx_init(&new_thread->mutex, &thread_lck_grp, &thread_lck_attr);
1372
1373         ipc_thread_init(new_thread);
1374
1375         new_thread->continuation = continuation;
1376         new_thread->parameter = parameter;
1377         new_thread->inheritor_flags = TURNSTILE_UPDATE_FLAGS_NONE;
1378         priority_queue_init(&new_thread->inheritor_queue,
1379                         PRIORITY_QUEUE_BUILTIN_MAX_HEAP);
1380
1381         /* Allocate I/O Statistics structure */
1382         new_thread->thread_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1383         assert(new_thread->thread_io_stats != NULL);
1384         bzero(new_thread->thread_io_stats, sizeof(struct io_stat_info));
1385         new_thread->sync_ipc_overrides = 0;
1386
1387 #if KASAN
1388         kasan_init_thread(&new_thread->kasan_data);
1389 #endif
1390
1391 #if CONFIG_IOSCHED
1392         /* Clear out the I/O Scheduling info for AppleFSCompression */
1393         new_thread->decmp_upl = NULL;
1394 #endif /* CONFIG_IOSCHED */
1395
1396 #if DEVELOPMENT || DEBUG
1397         task_lock(parent_task);
1398         uint16_t thread_limit = parent_task->task_thread_limit;
1399         if (exc_resource_threads_enabled &&
1400             thread_limit > 0 &&
1401             parent_task->thread_count >= thread_limit &&
1402             !parent_task->task_has_crossed_thread_limit &&
1403             !(parent_task->t_flags & TF_CORPSE)) {
1404                 int thread_count = parent_task->thread_count;
1405                 parent_task->task_has_crossed_thread_limit = TRUE;
1406                 task_unlock(parent_task);
1407                 SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(parent_task, thread_count);
1408         }
1409         else {
1410                 task_unlock(parent_task);
1411         }
1412 #endif
1413
1414         lck_mtx_lock(&tasks_threads_lock);
1415         task_lock(parent_task);
1416
1417         /*
1418          * Fail thread creation if parent task is being torn down or has too many threads
1419          * If the caller asked for TH_OPTION_NOSUSP, also fail if the parent task is suspended
1420          */
1421         if (parent_task->active == 0 || parent_task->halting ||
1422             (parent_task->suspend_count > 0 && (options & TH_OPTION_NOSUSP) != 0) ||
1423             (parent_task->thread_count >= task_threadmax && parent_task != kernel_task)) {
1424                 task_unlock(parent_task);
1425                 lck_mtx_unlock(&tasks_threads_lock);
1426
1427 #ifdef MACH_BSD
1428                 {
1429                         void *ut = new_thread->uthread;
1430
1431                         new_thread->uthread = NULL;
1432                         uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1433                         /* cred free may not be necessary */
1434                         uthread_cred_free(ut);
1435                         uthread_zone_free(ut);
1436                 }
1437 #endif  /* MACH_BSD */
1438                 ipc_thread_disable(new_thread);
1439                 ipc_thread_terminate(new_thread);
1440                 kfree(new_thread->thread_io_stats, sizeof(struct io_stat_info));
1441                 lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp);
1442                 machine_thread_destroy(new_thread);
1443                 zfree(thread_zone, new_thread);
1444                 return (KERN_FAILURE);
1445         }
1446
1447         /* New threads inherit any default state on the task */
1448         machine_thread_inherit_taskwide(new_thread, parent_task);
1449
1450         task_reference_internal(parent_task);
1451
1452         if (new_thread->task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
1453                 /*
1454                  * This task has a per-thread CPU limit; make sure this new thread
1455                  * gets its limit set too, before it gets out of the kernel.
1456                  */
1457                 act_set_astledger(new_thread);
1458         }
1459
1460         /* Instantiate a thread ledger. Do not fail thread creation if ledger creation fails. */
1461         if ((new_thread->t_threadledger = ledger_instantiate(thread_ledger_template,
1462                                 LEDGER_CREATE_INACTIVE_ENTRIES)) != LEDGER_NULL) {
1463
1464                 ledger_entry_setactive(new_thread->t_threadledger, thread_ledgers.cpu_time);
1465         }
1466
1467         new_thread->t_bankledger = LEDGER_NULL;
1468         new_thread->t_deduct_bank_ledger_time = 0;
1469         new_thread->t_deduct_bank_ledger_energy = 0;
1470
1471         new_thread->t_ledger = new_thread->task->ledger;
1472         if (new_thread->t_ledger)
1473                 ledger_reference(new_thread->t_ledger);
1474
1475 #if defined(CONFIG_SCHED_MULTIQ)
1476         /* Cache the task's sched_group */
1477         new_thread->sched_group = parent_task->sched_group;
1478 #endif /* defined(CONFIG_SCHED_MULTIQ) */
1479
1480         /* Cache the task's map */
1481         new_thread->map = parent_task->map;
1482
1483         timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread);
1484         timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread);
1485
1486 #if KPC
1487         kpc_thread_create(new_thread);
1488 #endif
1489
1490         /* Set the thread's scheduling parameters */
1491         new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task);
1492         new_thread->max_priority = parent_task->max_priority;
1493         new_thread->task_priority = parent_task->priority;
1494
1495         int new_priority = (priority < 0) ? parent_task->priority: priority;
1496         new_priority = (priority < 0)? parent_task->priority: priority;
1497         if (new_priority > new_thread->max_priority)
1498                 new_priority = new_thread->max_priority;
1499 #if CONFIG_EMBEDDED
1500         if (new_priority < MAXPRI_THROTTLE) {
1501                 new_priority = MAXPRI_THROTTLE;
1502         }
1503 #endif /* CONFIG_EMBEDDED */
1504
1505         new_thread->importance = new_priority - new_thread->task_priority;
1506
1507         sched_set_thread_base_priority(new_thread, new_priority);
1508
1509 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1510         new_thread->sched_stamp = sched_tick;
1511         new_thread->pri_shift = sched_pri_shifts[new_thread->th_sched_bucket];
1512 #endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) */
1513
1514 #if CONFIG_EMBEDDED
1515         if (parent_task->max_priority <= MAXPRI_THROTTLE)
1516                 sched_thread_mode_demote(new_thread, TH_SFLAG_THROTTLED);
1517 #endif /* CONFIG_EMBEDDED */
1518
1519         thread_policy_create(new_thread);
1520
1521         /* Chain the thread onto the task's list */
1522         queue_enter(&parent_task->threads, new_thread, thread_t, task_threads);
1523         parent_task->thread_count++;
1524
1525         /* So terminating threads don't need to take the task lock to decrement */
1526         hw_atomic_add(&parent_task->active_thread_count, 1);
1527
1528         /* Protected by the tasks_threads_lock */
1529         new_thread->thread_id = ++thread_unique_id;
1530
1531
1532         queue_enter(&threads, new_thread, thread_t, threads);
1533         threads_count++;
1534
1535         new_thread->active = TRUE;
1536         if (task_is_a_corpse_fork(parent_task)) {
1537                 /* Set the inspection bit if the task is a corpse fork */
1538                 new_thread->inspection = TRUE;
1539         } else {
1540                 new_thread->inspection = FALSE;
1541         }
1542         new_thread->corpse_dup = FALSE;
1543         new_thread->turnstile = turnstile_alloc();
1544         *out_thread = new_thread;
1545
1546         if (kdebug_enable) {
1547                 long args[4] = {};
1548
1549                 kdbg_trace_data(parent_task->bsd_info, &args[1], &args[3]);
1550
1551                 /*
1552                  * Starting with 26604425, exec'ing creates a new task/thread.
1553                  *
1554                  * NEWTHREAD in the current process has two possible meanings:
1555                  *
1556                  * 1) Create a new thread for this process.
1557                  * 2) Create a new thread for the future process this will become in an
1558                  * exec.
1559                  *
1560                  * To disambiguate these, arg3 will be set to TRUE for case #2.
1561                  *
1562                  * The value we need to find (TPF_EXEC_COPY) is stable in the case of a
1563                  * task exec'ing. The read of t_procflags does not take the proc_lock.
1564                  */
1565                 args[2] = task_is_exec_copy(parent_task) ? 1 : 0;
1566
1567                 KDBG_RELEASE(TRACE_DATA_NEWTHREAD, (uintptr_t)thread_tid(new_thread),
1568                                 args[1], args[2], args[3]);
1569
1570                 kdbg_trace_string(parent_task->bsd_info, &args[0], &args[1],
1571                                 &args[2], &args[3]);
1572                 KDBG_RELEASE(TRACE_STRING_NEWTHREAD, args[0], args[1], args[2],
1573                                 args[3]);
1574         }
1575
1576         DTRACE_PROC1(lwp__create, thread_t, *out_thread);
1577
1578         return (KERN_SUCCESS);
1579 }
1580
1581 static kern_return_t
1582 thread_create_internal2(
1583         task_t                          task,
1584         thread_t                        *new_thread,
1585         boolean_t                       from_user,
1586         thread_continue_t               continuation)
1587 {
1588         kern_return_t           result;
1589         thread_t                        thread;
1590
1591         if (task == TASK_NULL || task == kernel_task)
1592                 return (KERN_INVALID_ARGUMENT);
1593
1594         result = thread_create_internal(task, -1, continuation, NULL, TH_OPTION_NONE, &thread);
1595         if (result != KERN_SUCCESS)
1596                 return (result);
1597
1598         thread->user_stop_count = 1;
1599         thread_hold(thread);
1600         if (task->suspend_count > 0)
1601                 thread_hold(thread);
1602
1603         if (from_user)
1604                 extmod_statistics_incr_thread_create(task);
1605
1606         task_unlock(task);
1607         lck_mtx_unlock(&tasks_threads_lock);
1608
1609         *new_thread = thread;
1610
1611         return (KERN_SUCCESS);
1612 }
1613
1614 /* No prototype, since task_server.h has the _from_user version if KERNEL_SERVER */
1615 kern_return_t
1616 thread_create(
1617         task_t                          task,
1618         thread_t                        *new_thread);
1619
1620 kern_return_t
1621 thread_create(
1622         task_t                          task,
1623         thread_t                        *new_thread)
1624 {
1625         return thread_create_internal2(task, new_thread, FALSE, (thread_continue_t)thread_bootstrap_return);
1626 }
1627
1628 kern_return_t
1629 thread_create_from_user(
1630         task_t                          task,
1631         thread_t                        *new_thread)
1632 {
1633         return thread_create_internal2(task, new_thread, TRUE, (thread_continue_t)thread_bootstrap_return);
1634 }
1635
1636 kern_return_t
1637 thread_create_with_continuation(
1638         task_t                          task,
1639         thread_t                        *new_thread,
1640         thread_continue_t               continuation)
1641 {
1642         return thread_create_internal2(task, new_thread, FALSE, continuation);
1643 }
1644
1645 /*
1646  * Create a thread that is already started, but is waiting on an event
1647  */
1648 static kern_return_t
1649 thread_create_waiting_internal(
1650         task_t                  task,
1651         thread_continue_t       continuation,
1652         event_t                 event,
1653         block_hint_t            block_hint,
1654         int                     options,
1655         thread_t                *new_thread)
1656 {
1657         kern_return_t result;
1658         thread_t thread;
1659
1660         if (task == TASK_NULL || task == kernel_task)
1661                 return (KERN_INVALID_ARGUMENT);
1662
1663         result = thread_create_internal(task, -1, continuation, NULL,
1664                         options, &thread);
1665         if (result != KERN_SUCCESS)
1666                 return (result);
1667
1668         /* note no user_stop_count or thread_hold here */
1669
1670         if (task->suspend_count > 0)
1671                 thread_hold(thread);
1672
1673         thread_mtx_lock(thread);
1674         thread_set_pending_block_hint(thread, block_hint);
1675         if (options & TH_OPTION_WORKQ) {
1676                 thread->static_param = true;
1677                 event = workq_thread_init_and_wq_lock(task, thread);
1678         }
1679         thread_start_in_assert_wait(thread, event, THREAD_INTERRUPTIBLE);
1680         thread_mtx_unlock(thread);
1681
1682         task_unlock(task);
1683         lck_mtx_unlock(&tasks_threads_lock);
1684
1685         *new_thread = thread;
1686
1687         return (KERN_SUCCESS);
1688 }
1689
1690 kern_return_t
1691 thread_create_waiting(
1692         task_t                  task,
1693         thread_continue_t       continuation,
1694         event_t                 event,
1695         thread_t                *new_thread)
1696 {
1697         return thread_create_waiting_internal(task, continuation, event,
1698                         kThreadWaitNone, TH_OPTION_NONE, new_thread);
1699 }
1700
1701
1702 static kern_return_t
1703 thread_create_running_internal2(
1704         task_t         task,
1705         int                     flavor,
1706         thread_state_t          new_state,
1707         mach_msg_type_number_t  new_state_count,
1708         thread_t                                *new_thread,
1709         boolean_t                               from_user)
1710 {
1711         kern_return_t  result;
1712         thread_t                                thread;
1713
1714         if (task == TASK_NULL || task == kernel_task)
1715                 return (KERN_INVALID_ARGUMENT);
1716
1717         result = thread_create_internal(task, -1,
1718                         (thread_continue_t)thread_bootstrap_return, NULL,
1719                         TH_OPTION_NONE, &thread);
1720         if (result != KERN_SUCCESS)
1721                 return (result);
1722
1723         if (task->suspend_count > 0)
1724                 thread_hold(thread);
1725
1726         if (from_user) {
1727                 result = machine_thread_state_convert_from_user(thread, flavor,
1728                                 new_state, new_state_count);
1729         }
1730         if (result == KERN_SUCCESS) {
1731                 result = machine_thread_set_state(thread, flavor, new_state,
1732                                 new_state_count);
1733         }
1734         if (result != KERN_SUCCESS) {
1735                 task_unlock(task);
1736                 lck_mtx_unlock(&tasks_threads_lock);
1737
1738                 thread_terminate(thread);
1739                 thread_deallocate(thread);
1740                 return (result);
1741         }
1742
1743         thread_mtx_lock(thread);
1744         thread_start(thread);
1745         thread_mtx_unlock(thread);
1746
1747         if (from_user)
1748                 extmod_statistics_incr_thread_create(task);
1749
1750         task_unlock(task);
1751         lck_mtx_unlock(&tasks_threads_lock);
1752
1753         *new_thread = thread;
1754
1755         return (result);
1756 }
1757
1758 /* Prototype, see justification above */
1759 kern_return_t
1760 thread_create_running(
1761         task_t         task,
1762         int                     flavor,
1763         thread_state_t          new_state,
1764         mach_msg_type_number_t  new_state_count,
1765         thread_t                                *new_thread);
1766
1767 kern_return_t
1768 thread_create_running(
1769         task_t         task,
1770         int                     flavor,
1771         thread_state_t          new_state,
1772         mach_msg_type_number_t  new_state_count,
1773         thread_t                                *new_thread)
1774 {
1775         return thread_create_running_internal2(
1776                 task, flavor, new_state, new_state_count,
1777                 new_thread, FALSE);
1778 }
1779
1780 kern_return_t
1781 thread_create_running_from_user(
1782         task_t         task,
1783         int                     flavor,
1784         thread_state_t          new_state,
1785         mach_msg_type_number_t  new_state_count,
1786         thread_t                                *new_thread)
1787 {
1788         return thread_create_running_internal2(
1789                 task, flavor, new_state, new_state_count,
1790                 new_thread, TRUE);
1791 }
1792
1793 kern_return_t
1794 thread_create_workq_waiting(
1795         task_t              task,
1796         thread_continue_t   continuation,
1797         thread_t            *new_thread)
1798 {
1799         int options = TH_OPTION_NOCRED | TH_OPTION_NOSUSP | TH_OPTION_WORKQ;
1800         return thread_create_waiting_internal(task, continuation, NULL,
1801                         kThreadWaitParkedWorkQueue, options, new_thread);
1802 }
1803
1804 /*
1805  *      kernel_thread_create:
1806  *
1807  *      Create a thread in the kernel task
1808  *      to execute in kernel context.
1809  */
1810 kern_return_t
1811 kernel_thread_create(
1812         thread_continue_t       continuation,
1813         void                            *parameter,
1814         integer_t                       priority,
1815         thread_t                        *new_thread)
1816 {
1817         kern_return_t           result;
1818         thread_t                        thread;
1819         task_t                          task = kernel_task;
1820
1821         result = thread_create_internal(task, priority, continuation, parameter,
1822                         TH_OPTION_NOCRED | TH_OPTION_NONE, &thread);
1823         if (result != KERN_SUCCESS)
1824                 return (result);
1825
1826         task_unlock(task);
1827         lck_mtx_unlock(&tasks_threads_lock);
1828
1829         stack_alloc(thread);
1830         assert(thread->kernel_stack != 0);
1831 #if CONFIG_EMBEDDED
1832         if (priority > BASEPRI_KERNEL)
1833 #endif
1834         thread->reserved_stack = thread->kernel_stack;
1835
1836 if(debug_task & 1)
1837         kprintf("kernel_thread_create: thread = %p continuation = %p\n", thread, continuation);
1838         *new_thread = thread;
1839
1840         return (result);
1841 }
1842
1843 kern_return_t
1844 kernel_thread_start_priority(
1845         thread_continue_t       continuation,
1846         void                            *parameter,
1847         integer_t                       priority,
1848         thread_t                        *new_thread)
1849 {
1850         kern_return_t   result;
1851         thread_t                thread;
1852
1853         result = kernel_thread_create(continuation, parameter, priority, &thread);
1854         if (result != KERN_SUCCESS)
1855                 return (result);
1856
1857         *new_thread = thread;
1858
1859         thread_mtx_lock(thread);
1860         thread_start(thread);
1861         thread_mtx_unlock(thread);
1862
1863         return (result);
1864 }
1865
1866 kern_return_t
1867 kernel_thread_start(
1868         thread_continue_t       continuation,
1869         void                            *parameter,
1870         thread_t                        *new_thread)
1871 {
1872         return kernel_thread_start_priority(continuation, parameter, -1, new_thread);
1873 }
1874
1875 /* Separated into helper function so it can be used by THREAD_BASIC_INFO and THREAD_EXTENDED_INFO */
1876 /* it is assumed that the thread is locked by the caller */
1877 static void
1878 retrieve_thread_basic_info(thread_t thread, thread_basic_info_t basic_info)
1879 {
1880         int     state, flags;
1881
1882         /* fill in info */
1883
1884         thread_read_times(thread, &basic_info->user_time,
1885                         &basic_info->system_time, NULL);
1886
1887         /*
1888          *      Update lazy-evaluated scheduler info because someone wants it.
1889          */
1890         if (SCHED(can_update_priority)(thread))
1891                 SCHED(update_priority)(thread);
1892
1893         basic_info->sleep_time = 0;
1894
1895         /*
1896          *      To calculate cpu_usage, first correct for timer rate,
1897          *      then for 5/8 ageing.  The correction factor [3/5] is
1898          *      (1/(5/8) - 1).
1899          */
1900         basic_info->cpu_usage = 0;
1901 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1902         if (sched_tick_interval) {
1903                 basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage
1904                                                                         * TH_USAGE_SCALE) /     sched_tick_interval);
1905                 basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
1906         }
1907 #endif
1908
1909         if (basic_info->cpu_usage > TH_USAGE_SCALE)
1910                 basic_info->cpu_usage = TH_USAGE_SCALE;
1911
1912         basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
1913                                                                                         POLICY_TIMESHARE: POLICY_RR);
1914
1915         flags = 0;
1916         if (thread->options & TH_OPT_IDLE_THREAD)
1917                 flags |= TH_FLAGS_IDLE;
1918
1919         if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
1920                 flags |= TH_FLAGS_GLOBAL_FORCED_IDLE;
1921         }
1922
1923         if (!thread->kernel_stack)
1924                 flags |= TH_FLAGS_SWAPPED;
1925
1926         state = 0;
1927         if (thread->state & TH_TERMINATE)
1928                 state = TH_STATE_HALTED;
1929         else
1930         if (thread->state & TH_RUN)
1931                 state = TH_STATE_RUNNING;
1932         else
1933         if (thread->state & TH_UNINT)
1934                 state = TH_STATE_UNINTERRUPTIBLE;
1935         else
1936         if (thread->state & TH_SUSP)
1937                 state = TH_STATE_STOPPED;
1938         else
1939         if (thread->state & TH_WAIT)
1940                 state = TH_STATE_WAITING;
1941
1942         basic_info->run_state = state;
1943         basic_info->flags = flags;
1944
1945         basic_info->suspend_count = thread->user_stop_count;
1946
1947         return;
1948 }
1949
1950 kern_return_t
1951 thread_info_internal(
1952         thread_t                thread,
1953         thread_flavor_t                 flavor,
1954         thread_info_t                   thread_info_out,        /* ptr to OUT array */
1955         mach_msg_type_number_t  *thread_info_count)     /*IN/OUT*/
1956 {
1957         spl_t   s;
1958
1959         if (thread == THREAD_NULL)
1960                 return (KERN_INVALID_ARGUMENT);
1961
1962         if (flavor == THREAD_BASIC_INFO) {
1963
1964                 if (*thread_info_count < THREAD_BASIC_INFO_COUNT)
1965                         return (KERN_INVALID_ARGUMENT);
1966
1967                 s = splsched();
1968                 thread_lock(thread);
1969
1970                 retrieve_thread_basic_info(thread, (thread_basic_info_t) thread_info_out);
1971
1972                 thread_unlock(thread);
1973                 splx(s);
1974
1975                 *thread_info_count = THREAD_BASIC_INFO_COUNT;
1976
1977                 return (KERN_SUCCESS);
1978         }
1979         else
1980         if (flavor == THREAD_IDENTIFIER_INFO) {
1981                 thread_identifier_info_t        identifier_info;
1982
1983                 if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT)
1984                         return (KERN_INVALID_ARGUMENT);
1985
1986                 identifier_info = (thread_identifier_info_t) thread_info_out;
1987
1988                 s = splsched();
1989                 thread_lock(thread);
1990
1991                 identifier_info->thread_id = thread->thread_id;
1992                 identifier_info->thread_handle = thread->machine.cthread_self;
1993                 identifier_info->dispatch_qaddr = thread_dispatchqaddr(thread);
1994
1995                 thread_unlock(thread);
1996                 splx(s);
1997                 return KERN_SUCCESS;
1998         }
1999         else
2000         if (flavor == THREAD_SCHED_TIMESHARE_INFO) {
2001                 policy_timeshare_info_t         ts_info;
2002
2003                 if (*thread_info_count < POLICY_TIMESHARE_INFO_COUNT)
2004                         return (KERN_INVALID_ARGUMENT);
2005
2006                 ts_info = (policy_timeshare_info_t)thread_info_out;
2007
2008                 s = splsched();
2009                 thread_lock(thread);
2010
2011                 if (thread->sched_mode != TH_MODE_TIMESHARE) {
2012                         thread_unlock(thread);
2013                         splx(s);
2014                         return (KERN_INVALID_POLICY);
2015                 }
2016
2017                 ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
2018                 if (ts_info->depressed) {
2019                         ts_info->base_priority = DEPRESSPRI;
2020                         ts_info->depress_priority = thread->base_pri;
2021                 }
2022                 else {
2023                         ts_info->base_priority = thread->base_pri;
2024                         ts_info->depress_priority = -1;
2025                 }
2026
2027                 ts_info->cur_priority = thread->sched_pri;
2028                 ts_info->max_priority = thread->max_priority;
2029
2030                 thread_unlock(thread);
2031                 splx(s);
2032
2033                 *thread_info_count = POLICY_TIMESHARE_INFO_COUNT;
2034
2035                 return (KERN_SUCCESS);
2036         }
2037         else
2038         if (flavor == THREAD_SCHED_FIFO_INFO) {
2039                 if (*thread_info_count < POLICY_FIFO_INFO_COUNT)
2040                         return (KERN_INVALID_ARGUMENT);
2041
2042                 return (KERN_INVALID_POLICY);
2043         }
2044         else
2045         if (flavor == THREAD_SCHED_RR_INFO) {
2046                 policy_rr_info_t                        rr_info;
2047                 uint32_t quantum_time;
2048                 uint64_t quantum_ns;
2049
2050                 if (*thread_info_count < POLICY_RR_INFO_COUNT)
2051                         return (KERN_INVALID_ARGUMENT);
2052
2053                 rr_info = (policy_rr_info_t) thread_info_out;
2054
2055                 s = splsched();
2056                 thread_lock(thread);
2057
2058                 if (thread->sched_mode == TH_MODE_TIMESHARE) {
2059                         thread_unlock(thread);
2060                         splx(s);
2061
2062                         return (KERN_INVALID_POLICY);
2063             }
2064
2065                 rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
2066                 if (rr_info->depressed) {
2067                         rr_info->base_priority = DEPRESSPRI;
2068                         rr_info->depress_priority = thread->base_pri;
2069                 }
2070                 else {
2071                         rr_info->base_priority = thread->base_pri;
2072                         rr_info->depress_priority = -1;
2073                 }
2074
2075                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
2076                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
2077
2078                 rr_info->max_priority = thread->max_priority;
2079                 rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
2080
2081                 thread_unlock(thread);
2082                 splx(s);
2083
2084                 *thread_info_count = POLICY_RR_INFO_COUNT;
2085
2086                 return (KERN_SUCCESS);
2087         }
2088         else
2089         if (flavor == THREAD_EXTENDED_INFO) {
2090                 thread_basic_info_data_t        basic_info;
2091                 thread_extended_info_t          extended_info = (thread_extended_info_t) thread_info_out;
2092
2093                 if (*thread_info_count < THREAD_EXTENDED_INFO_COUNT) {
2094                         return (KERN_INVALID_ARGUMENT);
2095                 }
2096
2097                 s = splsched();
2098                 thread_lock(thread);
2099
2100                 /* NOTE: This mimics fill_taskthreadinfo(), which is the function used by proc_pidinfo() for
2101                  * the PROC_PIDTHREADINFO flavor (which can't be used on corpses)
2102                  */
2103                 retrieve_thread_basic_info(thread, &basic_info);
2104                 extended_info->pth_user_time = ((basic_info.user_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.user_time.microseconds * (integer_t)NSEC_PER_USEC));
2105                 extended_info->pth_system_time = ((basic_info.system_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.system_time.microseconds * (integer_t)NSEC_PER_USEC));
2106
2107                 extended_info->pth_cpu_usage = basic_info.cpu_usage;
2108                 extended_info->pth_policy = basic_info.policy;
2109                 extended_info->pth_run_state = basic_info.run_state;
2110                 extended_info->pth_flags = basic_info.flags;
2111                 extended_info->pth_sleep_time = basic_info.sleep_time;
2112                 extended_info->pth_curpri = thread->sched_pri;
2113                 extended_info->pth_priority = thread->base_pri;
2114                 extended_info->pth_maxpriority = thread->max_priority;
2115
2116                 bsd_getthreadname(thread->uthread,extended_info->pth_name);
2117
2118                 thread_unlock(thread);
2119                 splx(s);
2120
2121                 *thread_info_count = THREAD_EXTENDED_INFO_COUNT;
2122
2123                 return (KERN_SUCCESS);
2124         }
2125         else
2126         if (flavor == THREAD_DEBUG_INFO_INTERNAL) {
2127 #if DEVELOPMENT || DEBUG
2128                 thread_debug_info_internal_t dbg_info;
2129                 if (*thread_info_count < THREAD_DEBUG_INFO_INTERNAL_COUNT)
2130                         return (KERN_NOT_SUPPORTED);
2131
2132                 if (thread_info_out == NULL)
2133                         return (KERN_INVALID_ARGUMENT);
2134
2135                 dbg_info = (thread_debug_info_internal_t) thread_info_out;
2136                 dbg_info->page_creation_count = thread->t_page_creation_count;
2137
2138                 *thread_info_count = THREAD_DEBUG_INFO_INTERNAL_COUNT;
2139                 return (KERN_SUCCESS);
2140 #endif /* DEVELOPMENT || DEBUG */
2141                 return (KERN_NOT_SUPPORTED);
2142         }
2143
2144         return (KERN_INVALID_ARGUMENT);
2145 }
2146
2147 void
2148 thread_read_times(
2149         thread_t                thread,
2150         time_value_t    *user_time,
2151         time_value_t    *system_time,
2152         time_value_t    *runnable_time)
2153 {
2154         clock_sec_t             secs;
2155         clock_usec_t    usecs;
2156         uint64_t                tval_user, tval_system;
2157
2158         tval_user = timer_grab(&thread->user_timer);
2159         tval_system = timer_grab(&thread->system_timer);
2160
2161         if (thread->precise_user_kernel_time) {
2162                 absolutetime_to_microtime(tval_user, &secs, &usecs);
2163                 user_time->seconds = (typeof(user_time->seconds))secs;
2164                 user_time->microseconds = usecs;
2165
2166                 absolutetime_to_microtime(tval_system, &secs, &usecs);
2167                 system_time->seconds = (typeof(system_time->seconds))secs;
2168                 system_time->microseconds = usecs;
2169         } else {
2170                 /* system_timer may represent either sys or user */
2171                 tval_user += tval_system;
2172                 absolutetime_to_microtime(tval_user, &secs, &usecs);
2173                 user_time->seconds = (typeof(user_time->seconds))secs;
2174                 user_time->microseconds = usecs;
2175
2176                 system_time->seconds = 0;
2177                 system_time->microseconds = 0;
2178         }
2179
2180         if (runnable_time) {
2181                 uint64_t tval_runnable = timer_grab(&thread->runnable_timer);
2182                 absolutetime_to_microtime(tval_runnable, &secs, &usecs);
2183                 runnable_time->seconds = (typeof(runnable_time->seconds))secs;
2184                 runnable_time->microseconds = usecs;
2185         }
2186 }
2187
2188 uint64_t thread_get_runtime_self(void)
2189 {
2190         boolean_t interrupt_state;
2191         uint64_t runtime;
2192         thread_t thread = NULL;
2193         processor_t processor = NULL;
2194
2195         thread = current_thread();
2196
2197         /* Not interrupt safe, as the scheduler may otherwise update timer values underneath us */
2198         interrupt_state = ml_set_interrupts_enabled(FALSE);
2199         processor = current_processor();
2200         timer_update(PROCESSOR_DATA(processor, thread_timer), mach_absolute_time());
2201         runtime = (timer_grab(&thread->user_timer) + timer_grab(&thread->system_timer));
2202         ml_set_interrupts_enabled(interrupt_state);
2203
2204         return runtime;
2205 }
2206
2207 kern_return_t
2208 thread_assign(
2209         __unused thread_t                       thread,
2210         __unused processor_set_t        new_pset)
2211 {
2212         return (KERN_FAILURE);
2213 }
2214
2215 /*
2216  *      thread_assign_default:
2217  *
2218  *      Special version of thread_assign for assigning threads to default
2219  *      processor set.
2220  */
2221 kern_return_t
2222 thread_assign_default(
2223         thread_t                thread)
2224 {
2225         return (thread_assign(thread, &pset0));
2226 }
2227
2228 /*
2229  *      thread_get_assignment
2230  *
2231  *      Return current assignment for this thread.
2232  */
2233 kern_return_t
2234 thread_get_assignment(
2235         thread_t                thread,
2236         processor_set_t *pset)
2237 {
2238         if (thread == NULL)
2239                 return (KERN_INVALID_ARGUMENT);
2240
2241         *pset = &pset0;
2242
2243         return (KERN_SUCCESS);
2244 }
2245
2246 /*
2247  *      thread_wire_internal:
2248  *
2249  *      Specify that the target thread must always be able
2250  *      to run and to allocate memory.
2251  */
2252 kern_return_t
2253 thread_wire_internal(
2254         host_priv_t             host_priv,
2255         thread_t                thread,
2256         boolean_t               wired,
2257         boolean_t               *prev_state)
2258 {
2259         if (host_priv == NULL || thread != current_thread())
2260                 return (KERN_INVALID_ARGUMENT);
2261
2262         assert(host_priv == &realhost);
2263
2264         if (prev_state)
2265             *prev_state = (thread->options & TH_OPT_VMPRIV) != 0;
2266
2267         if (wired) {
2268             if (!(thread->options & TH_OPT_VMPRIV))
2269                     vm_page_free_reserve(1);    /* XXX */
2270             thread->options |= TH_OPT_VMPRIV;
2271         }
2272         else {
2273             if (thread->options & TH_OPT_VMPRIV)
2274                     vm_page_free_reserve(-1);   /* XXX */
2275             thread->options &= ~TH_OPT_VMPRIV;
2276         }
2277
2278         return (KERN_SUCCESS);
2279 }
2280
2281
2282 /*
2283  *      thread_wire:
2284  *
2285  *      User-api wrapper for thread_wire_internal()
2286  */
2287 kern_return_t
2288 thread_wire(
2289         host_priv_t     host_priv,
2290         thread_t        thread,
2291         boolean_t       wired)
2292 {
2293     return (thread_wire_internal(host_priv, thread, wired, NULL));
2294 }
2295
2296
2297 boolean_t
2298 is_vm_privileged(void)
2299 {
2300         return current_thread()->options & TH_OPT_VMPRIV ? TRUE : FALSE;
2301 }
2302
2303 boolean_t
2304 set_vm_privilege(boolean_t privileged)
2305 {
2306         boolean_t       was_vmpriv;
2307
2308         if (current_thread()->options & TH_OPT_VMPRIV)
2309                 was_vmpriv = TRUE;
2310         else
2311                 was_vmpriv = FALSE;
2312
2313         if (privileged != FALSE)
2314                 current_thread()->options |= TH_OPT_VMPRIV;
2315         else
2316                 current_thread()->options &= ~TH_OPT_VMPRIV;
2317
2318         return (was_vmpriv);
2319 }
2320
2321 void
2322 set_thread_rwlock_boost(void)
2323 {
2324         current_thread()->rwlock_count++;
2325 }
2326
2327 void
2328 clear_thread_rwlock_boost(void)
2329 {
2330         thread_t thread = current_thread();
2331
2332         if ((thread->rwlock_count-- == 1) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2333
2334                 lck_rw_clear_promotion(thread, 0);
2335         }
2336 }
2337
2338
2339 /*
2340  * XXX assuming current thread only, for now...
2341  */
2342 void
2343 thread_guard_violation(thread_t thread,
2344     mach_exception_data_type_t code, mach_exception_data_type_t subcode)
2345 {
2346         assert(thread == current_thread());
2347
2348         /* don't set up the AST for kernel threads */
2349         if (thread->task == kernel_task)
2350                 return;
2351
2352         spl_t s = splsched();
2353         /*
2354          * Use the saved state area of the thread structure
2355          * to store all info required to handle the AST when
2356          * returning to userspace
2357          */
2358         assert(EXC_GUARD_DECODE_GUARD_TYPE(code));
2359         thread->guard_exc_info.code = code;
2360         thread->guard_exc_info.subcode = subcode;
2361         thread_ast_set(thread, AST_GUARD);
2362         ast_propagate(thread);
2363
2364         splx(s);
2365 }
2366
2367 /*
2368  *      guard_ast:
2369  *
2370  *      Handle AST_GUARD for a thread. This routine looks at the
2371  *      state saved in the thread structure to determine the cause
2372  *      of this exception. Based on this value, it invokes the
2373  *      appropriate routine which determines other exception related
2374  *      info and raises the exception.
2375  */
2376 void
2377 guard_ast(thread_t t)
2378 {
2379         const mach_exception_data_type_t
2380                 code = t->guard_exc_info.code,
2381                 subcode = t->guard_exc_info.subcode;
2382
2383         t->guard_exc_info.code = 0;
2384         t->guard_exc_info.subcode = 0;
2385
2386         switch (EXC_GUARD_DECODE_GUARD_TYPE(code)) {
2387         case GUARD_TYPE_NONE:
2388                 /* lingering AST_GUARD on the processor? */
2389                 break;
2390         case GUARD_TYPE_MACH_PORT:
2391                 mach_port_guard_ast(t, code, subcode);
2392                 break;
2393         case GUARD_TYPE_FD:
2394                 fd_guard_ast(t, code, subcode);
2395                 break;
2396 #if CONFIG_VNGUARD
2397         case GUARD_TYPE_VN:
2398                 vn_guard_ast(t, code, subcode);
2399                 break;
2400 #endif
2401         case GUARD_TYPE_VIRT_MEMORY:
2402                 virt_memory_guard_ast(t, code, subcode);
2403                 break;
2404         default:
2405                 panic("guard_exc_info %llx %llx", code, subcode);
2406         }
2407 }
2408
2409 static void
2410 thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1)
2411 {
2412         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
2413 #if CONFIG_TELEMETRY
2414                 /*
2415                  * This thread is in danger of violating the CPU usage monitor. Enable telemetry
2416                  * on the entire task so there are micro-stackshots available if and when
2417                  * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots
2418                  * for this thread only; but now that this task is suspect, knowing what all of
2419                  * its threads are up to will be useful.
2420                  */
2421                 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1);
2422 #endif
2423                 return;
2424         }
2425
2426 #if CONFIG_TELEMETRY
2427         /*
2428          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
2429          * exceeded the limit, turn telemetry off for the task.
2430          */
2431         telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0);
2432 #endif
2433
2434         if (warning == 0) {
2435                 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU();
2436         }
2437 }
2438
2439 void __attribute__((noinline))
2440 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void)
2441 {
2442         int          pid                = 0;
2443         task_t           task                           = current_task();
2444         thread_t     thread             = current_thread();
2445         uint64_t     tid                = thread->thread_id;
2446         const char       *procname          = "unknown";
2447         time_value_t thread_total_time  = {0, 0};
2448         time_value_t thread_system_time;
2449         time_value_t thread_user_time;
2450         int          action;
2451         uint8_t      percentage;
2452         uint32_t     usage_percent = 0;
2453         uint32_t     interval_sec;
2454         uint64_t     interval_ns;
2455         uint64_t     balance_ns;
2456         boolean_t        fatal = FALSE;
2457         boolean_t        send_exc_resource = TRUE; /* in addition to RESOURCE_NOTIFY */
2458         kern_return_t   kr;
2459
2460 #ifdef EXC_RESOURCE_MONITORS
2461         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
2462 #endif /* EXC_RESOURCE_MONITORS */
2463         struct ledger_entry_info        lei;
2464
2465         assert(thread->t_threadledger != LEDGER_NULL);
2466
2467         /*
2468          * Extract the fatal bit and suspend the monitor (which clears the bit).
2469          */
2470         task_lock(task);
2471         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) {
2472                 fatal = TRUE;
2473                 send_exc_resource = TRUE;
2474         }
2475         /* Only one thread can be here at a time.  Whichever makes it through
2476            first will successfully suspend the monitor and proceed to send the
2477            notification.  Other threads will get an error trying to suspend the
2478            monitor and give up on sending the notification.  In the first release,
2479            the monitor won't be resumed for a number of seconds, but we may
2480            eventually need to handle low-latency resume.
2481          */
2482         kr = task_suspend_cpumon(task);
2483         task_unlock(task);
2484         if (kr == KERN_INVALID_ARGUMENT)        return;
2485
2486 #ifdef MACH_BSD
2487         pid = proc_selfpid();
2488         if (task->bsd_info != NULL) {
2489                 procname = proc_name_address(task->bsd_info);
2490         }
2491 #endif
2492
2493         thread_get_cpulimit(&action, &percentage, &interval_ns);
2494
2495         interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC);
2496
2497         thread_read_times(thread, &thread_user_time, &thread_system_time, NULL);
2498         time_value_add(&thread_total_time, &thread_user_time);
2499         time_value_add(&thread_total_time, &thread_system_time);
2500         ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei);
2501
2502         /* credit/debit/balance/limit are in absolute time units;
2503            the refill info is in nanoseconds. */
2504         absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns);
2505         if (lei.lei_last_refill > 0) {
2506                 usage_percent = (uint32_t)((balance_ns*100ULL) / lei.lei_last_refill);
2507         }
2508
2509         /* TODO: show task total runtime (via TASK_ABSOLUTETIME_INFO)? */
2510         printf("process %s[%d] thread %llu caught burning CPU! "
2511                "It used more than %d%% CPU over %u seconds "
2512                "(actual recent usage: %d%% over ~%llu seconds).  "
2513                "Thread lifetime cpu usage %d.%06ds, (%d.%06d user, %d.%06d sys) "
2514                "ledger balance: %lld mabs credit: %lld mabs debit: %lld mabs "
2515                "limit: %llu mabs period: %llu ns last refill: %llu ns%s.\n",
2516                procname, pid, tid,
2517                percentage, interval_sec,
2518                usage_percent,
2519                (lei.lei_last_refill + NSEC_PER_SEC/2) / NSEC_PER_SEC,
2520                thread_total_time.seconds, thread_total_time.microseconds,
2521                thread_user_time.seconds, thread_user_time.microseconds,
2522                thread_system_time.seconds,thread_system_time.microseconds,
2523                lei.lei_balance, lei.lei_credit, lei.lei_debit,
2524                lei.lei_limit, lei.lei_refill_period, lei.lei_last_refill,
2525                (fatal ? " [fatal violation]" : ""));
2526
2527         /*
2528            For now, send RESOURCE_NOTIFY in parallel with EXC_RESOURCE.  Once
2529            we have logging parity, we will stop sending EXC_RESOURCE (24508922).
2530          */
2531
2532         /* RESOURCE_NOTIFY MIG specifies nanoseconds of CPU time */
2533         lei.lei_balance = balance_ns;
2534         absolutetime_to_nanoseconds(lei.lei_limit, &lei.lei_limit);
2535         trace_resource_violation(RMON_CPUUSAGE_VIOLATED, &lei);
2536         kr = send_resource_violation(send_cpu_usage_violation, task, &lei,
2537                                                                  fatal ? kRNFatalLimitFlag : 0);
2538         if (kr) {
2539                 printf("send_resource_violation(CPU usage, ...): error %#x\n", kr);
2540         }
2541
2542 #ifdef EXC_RESOURCE_MONITORS
2543         if (send_exc_resource) {
2544                 if (disable_exc_resource) {
2545                         printf("process %s[%d] thread %llu caught burning CPU! "
2546                                    "EXC_RESOURCE%s supressed by a boot-arg\n",
2547                                    procname, pid, tid, fatal ? " (and termination)" : "");
2548                         return;
2549                 }
2550
2551                 if (audio_active) {
2552                         printf("process %s[%d] thread %llu caught burning CPU! "
2553                            "EXC_RESOURCE & termination supressed due to audio playback\n",
2554                                    procname, pid, tid);
2555                         return;
2556                 }
2557         }
2558
2559
2560         if (send_exc_resource) {
2561                 code[0] = code[1] = 0;
2562                 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU);
2563                 if (fatal) {
2564                         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR_FATAL);
2565                 }else {
2566                         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR);
2567                 }
2568                 EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec);
2569                 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], percentage);
2570                 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent);
2571                 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
2572         }
2573 #endif /* EXC_RESOURCE_MONITORS */
2574
2575         if (fatal) {
2576 #if CONFIG_JETSAM
2577                 jetsam_on_ledger_cpulimit_exceeded();
2578 #else
2579                 task_terminate_internal(task);
2580 #endif
2581         }
2582 }
2583
2584 #if DEVELOPMENT || DEBUG
2585 void __attribute__((noinline)) SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(task_t task, int thread_count)
2586 {
2587         mach_exception_data_type_t code[EXCEPTION_CODE_MAX] = {0};
2588         int pid = task_pid(task);
2589         char procname[MAXCOMLEN+1] = "unknown";
2590
2591         if (pid == 1) {
2592                 /*
2593                  * Cannot suspend launchd
2594                  */
2595                 return;
2596         }
2597
2598         proc_name(pid, procname, sizeof(procname));
2599
2600         if (disable_exc_resource) {
2601                 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2602                         "supressed by a boot-arg. \n", procname, pid, thread_count);
2603                 return;
2604         }
2605
2606         if (audio_active) {
2607                 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2608                         "supressed due to audio playback.\n", procname, pid, thread_count);
2609                 return;
2610         }
2611
2612         if (exc_via_corpse_forking == 0) {
2613                 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2614                         "supressed due to corpse forking being disabled.\n", procname, pid,
2615                         thread_count);
2616                 return;
2617         }
2618
2619         printf("process %s[%d] crossed thread count high watermark (%d), sending "
2620                 "EXC_RESOURCE\n", procname, pid, thread_count);
2621
2622         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_THREADS);
2623         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_THREADS_HIGH_WATERMARK);
2624         EXC_RESOURCE_THREADS_ENCODE_THREADS(code[0], thread_count);
2625
2626         task_enqueue_exception_with_corpse(task, EXC_RESOURCE, code, EXCEPTION_CODE_MAX, NULL);
2627 }
2628 #endif /* DEVELOPMENT || DEBUG */
2629
2630 void thread_update_io_stats(thread_t thread, int size, int io_flags)
2631 {
2632         int io_tier;
2633
2634         if (thread->thread_io_stats == NULL || thread->task->task_io_stats == NULL)
2635                 return;
2636
2637         if (io_flags & DKIO_READ) {
2638                 UPDATE_IO_STATS(thread->thread_io_stats->disk_reads, size);
2639                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->disk_reads, size);
2640         }
2641
2642         if (io_flags & DKIO_META) {
2643                 UPDATE_IO_STATS(thread->thread_io_stats->metadata, size);
2644                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->metadata, size);
2645         }
2646
2647         if (io_flags & DKIO_PAGING) {
2648                 UPDATE_IO_STATS(thread->thread_io_stats->paging, size);
2649                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->paging, size);
2650         }
2651
2652         io_tier = ((io_flags & DKIO_TIER_MASK) >> DKIO_TIER_SHIFT);
2653         assert (io_tier < IO_NUM_PRIORITIES);
2654
2655         UPDATE_IO_STATS(thread->thread_io_stats->io_priority[io_tier], size);
2656         UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->io_priority[io_tier], size);
2657
2658         /* Update Total I/O Counts */
2659         UPDATE_IO_STATS(thread->thread_io_stats->total_io, size);
2660         UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->total_io, size);
2661
2662         if (!(io_flags & DKIO_READ)) {
2663                 DTRACE_IO3(physical_writes, struct task *, thread->task, uint32_t, size, int, io_flags);
2664                 ledger_credit(thread->task->ledger, task_ledgers.physical_writes, size);
2665         }
2666 }
2667
2668 static void
2669 init_thread_ledgers(void) {
2670         ledger_template_t t;
2671         int idx;
2672
2673         assert(thread_ledger_template == NULL);
2674
2675         if ((t = ledger_template_create("Per-thread ledger")) == NULL)
2676                 panic("couldn't create thread ledger template");
2677
2678         if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) {
2679                 panic("couldn't create cpu_time entry for thread ledger template");
2680         }
2681
2682         if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) {
2683                 panic("couldn't set thread ledger callback for cpu_time entry");
2684         }
2685
2686         thread_ledgers.cpu_time = idx;
2687
2688         ledger_template_complete(t);
2689         thread_ledger_template = t;
2690 }
2691
2692 /*
2693  * Returns currently applied CPU usage limit, or 0/0 if none is applied.
2694  */
2695 int
2696 thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns)
2697 {
2698         int64_t         abstime = 0;
2699         uint64_t        limittime = 0;
2700         thread_t        thread = current_thread();
2701
2702         *percentage  = 0;
2703         *interval_ns = 0;
2704         *action      = 0;
2705
2706         if (thread->t_threadledger == LEDGER_NULL) {
2707                 /*
2708                  * This thread has no per-thread ledger, so it can't possibly
2709                  * have a CPU limit applied.
2710                  */
2711                 return (KERN_SUCCESS);
2712         }
2713
2714         ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns);
2715         ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime);
2716
2717         if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) {
2718                 /*
2719                  * This thread's CPU time ledger has no period or limit; so it
2720                  * doesn't have a CPU limit applied.
2721                  */
2722                  return (KERN_SUCCESS);
2723         }
2724
2725         /*
2726          * This calculation is the converse to the one in thread_set_cpulimit().
2727          */
2728         absolutetime_to_nanoseconds(abstime, &limittime);
2729         *percentage = (limittime * 100ULL) / *interval_ns;
2730         assert(*percentage <= 100);
2731
2732         if (thread->options & TH_OPT_PROC_CPULIMIT) {
2733                 assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0);
2734
2735                 *action = THREAD_CPULIMIT_BLOCK;
2736         } else if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2737                 assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0);
2738
2739                 *action = THREAD_CPULIMIT_EXCEPTION;
2740         } else {
2741                 *action = THREAD_CPULIMIT_DISABLE;
2742         }
2743
2744         return (KERN_SUCCESS);
2745 }
2746
2747 /*
2748  * Set CPU usage limit on a thread.
2749  *
2750  * Calling with percentage of 0 will unset the limit for this thread.
2751  */
2752 int
2753 thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns)
2754 {
2755         thread_t        thread = current_thread();
2756         ledger_t        l;
2757         uint64_t        limittime = 0;
2758         uint64_t        abstime = 0;
2759
2760         assert(percentage <= 100);
2761
2762         if (action == THREAD_CPULIMIT_DISABLE) {
2763                 /*
2764                  * Remove CPU limit, if any exists.
2765                  */
2766                 if (thread->t_threadledger != LEDGER_NULL) {
2767                         l = thread->t_threadledger;
2768                         ledger_set_limit(l, thread_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
2769                         ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_IGNORE);
2770                         thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT);
2771                 }
2772
2773                 return (0);
2774         }
2775
2776         if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) {
2777                 return (KERN_INVALID_ARGUMENT);
2778         }
2779
2780         l = thread->t_threadledger;
2781         if (l == LEDGER_NULL) {
2782                 /*
2783                  * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active.
2784                  */
2785                 if ((l = ledger_instantiate(thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL)
2786                         return (KERN_RESOURCE_SHORTAGE);
2787
2788                 /*
2789                  * We are the first to create this thread's ledger, so only activate our entry.
2790                  */
2791                 ledger_entry_setactive(l, thread_ledgers.cpu_time);
2792                 thread->t_threadledger = l;
2793         }
2794
2795         /*
2796          * The limit is specified as a percentage of CPU over an interval in nanoseconds.
2797          * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit.
2798          */
2799         limittime = (interval_ns * percentage) / 100;
2800         nanoseconds_to_absolutetime(limittime, &abstime);
2801         ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct);
2802         /*
2803          * Refill the thread's allotted CPU time every interval_ns nanoseconds.
2804          */
2805         ledger_set_period(l, thread_ledgers.cpu_time, interval_ns);
2806
2807         if (action == THREAD_CPULIMIT_EXCEPTION) {
2808                 /*
2809                  * We don't support programming the CPU usage monitor on a task if any of its
2810                  * threads have a per-thread blocking CPU limit configured.
2811                  */
2812                 if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2813                         panic("CPU usage monitor activated, but blocking thread limit exists");
2814                 }
2815
2816                 /*
2817                  * Make a note that this thread's CPU limit is being used for the task-wide CPU
2818                  * usage monitor. We don't have to arm the callback which will trigger the
2819                  * exception, because that was done for us in ledger_instantiate (because the
2820                  * ledger template used has a default callback).
2821                  */
2822                 thread->options |= TH_OPT_PROC_CPULIMIT;
2823         } else {
2824                 /*
2825                  * We deliberately override any CPU limit imposed by a task-wide limit (eg
2826                  * CPU usage monitor).
2827                  */
2828                 thread->options &= ~TH_OPT_PROC_CPULIMIT;
2829
2830                 thread->options |= TH_OPT_PRVT_CPULIMIT;
2831                 /* The per-thread ledger template by default has a callback for CPU time */
2832                 ledger_disable_callback(l, thread_ledgers.cpu_time);
2833                 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
2834         }
2835
2836         return (0);
2837 }
2838
2839 void
2840 thread_sched_call(
2841         thread_t                thread,
2842         sched_call_t    call)
2843 {
2844         assert((thread->state & TH_WAIT_REPORT) == 0);
2845         thread->sched_call = call;
2846 }
2847
2848 uint64_t
2849 thread_tid(
2850         thread_t        thread)
2851 {
2852         return (thread != THREAD_NULL? thread->thread_id: 0);
2853 }
2854
2855 uint16_t
2856 thread_set_tag(thread_t th, uint16_t tag)
2857 {
2858         return thread_set_tag_internal(th, tag);
2859 }
2860
2861 uint16_t
2862 thread_get_tag(thread_t th)
2863 {
2864         return thread_get_tag_internal(th);
2865 }
2866
2867 uint64_t
2868 thread_last_run_time(thread_t th)
2869 {
2870         return th->last_run_time;
2871 }
2872
2873 uint64_t
2874 thread_dispatchqaddr(
2875         thread_t                thread)
2876 {
2877         uint64_t        dispatchqueue_addr;
2878         uint64_t        thread_handle;
2879
2880         if (thread == THREAD_NULL)
2881                 return 0;
2882
2883         thread_handle = thread->machine.cthread_self;
2884         if (thread_handle == 0)
2885                 return 0;
2886
2887         if (thread->inspection == TRUE)
2888                 dispatchqueue_addr = thread_handle + get_task_dispatchqueue_offset(thread->task);
2889         else if (thread->task->bsd_info)
2890                 dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
2891         else
2892                 dispatchqueue_addr = 0;
2893
2894         return dispatchqueue_addr;
2895 }
2896
2897 uint64_t
2898 thread_rettokern_addr(
2899         thread_t                thread)
2900 {
2901         uint64_t        rettokern_addr;
2902         uint64_t        rettokern_offset;
2903         uint64_t        thread_handle;
2904
2905         if (thread == THREAD_NULL)
2906                 return 0;
2907
2908         thread_handle = thread->machine.cthread_self;
2909         if (thread_handle == 0)
2910                 return 0;
2911
2912         if (thread->task->bsd_info) {
2913                 rettokern_offset = get_return_to_kernel_offset_from_proc(thread->task->bsd_info);
2914
2915                 /* Return 0 if return to kernel offset is not initialized. */
2916                 if (rettokern_offset == 0) {
2917                         rettokern_addr = 0;
2918                 } else {
2919                         rettokern_addr = thread_handle + rettokern_offset;
2920                 }
2921         } else {
2922                 rettokern_addr = 0;
2923         }
2924
2925         return rettokern_addr;
2926 }
2927
2928 /*
2929  * Export routines to other components for things that are done as macros
2930  * within the osfmk component.
2931  */
2932
2933 #undef thread_mtx_lock
2934 void thread_mtx_lock(thread_t thread);
2935 void
2936 thread_mtx_lock(thread_t thread)
2937 {
2938         lck_mtx_lock(&thread->mutex);
2939 }
2940
2941 #undef thread_mtx_unlock
2942 void thread_mtx_unlock(thread_t thread);
2943 void
2944 thread_mtx_unlock(thread_t thread)
2945 {
2946         lck_mtx_unlock(&thread->mutex);
2947 }
2948
2949 #undef thread_reference
2950 void thread_reference(thread_t thread);
2951 void
2952 thread_reference(
2953         thread_t        thread)
2954 {
2955         if (thread != THREAD_NULL)
2956                 thread_reference_internal(thread);
2957 }
2958
2959 #undef thread_should_halt
2960
2961 boolean_t
2962 thread_should_halt(
2963         thread_t                th)
2964 {
2965         return (thread_should_halt_fast(th));
2966 }
2967
2968 /*
2969  * thread_set_voucher_name - reset the voucher port name bound to this thread
2970  *
2971  * Conditions:  nothing locked
2972  *
2973  *      If we already converted the previous name to a cached voucher
2974  *      reference, then we discard that reference here.  The next lookup
2975  *      will cache it again.
2976  */
2977
2978 kern_return_t
2979 thread_set_voucher_name(mach_port_name_t voucher_name)
2980 {
2981         thread_t thread = current_thread();
2982         ipc_voucher_t new_voucher = IPC_VOUCHER_NULL;
2983         ipc_voucher_t voucher;
2984         ledger_t bankledger = NULL;
2985         struct thread_group *banktg = NULL;
2986
2987         if (MACH_PORT_DEAD == voucher_name)
2988                 return KERN_INVALID_RIGHT;
2989
2990         /*
2991          * agressively convert to voucher reference
2992          */
2993         if (MACH_PORT_VALID(voucher_name)) {
2994                 new_voucher = convert_port_name_to_voucher(voucher_name);
2995                 if (IPC_VOUCHER_NULL == new_voucher)
2996                         return KERN_INVALID_ARGUMENT;
2997         }
2998         bank_get_bank_ledger_and_thread_group(new_voucher, &bankledger, &banktg);
2999
3000         thread_mtx_lock(thread);
3001         voucher = thread->ith_voucher;
3002         thread->ith_voucher_name = voucher_name;
3003         thread->ith_voucher = new_voucher;
3004         thread_mtx_unlock(thread);
3005
3006         bank_swap_thread_bank_ledger(thread, bankledger);
3007
3008         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3009                                   MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
3010                                   (uintptr_t)thread_tid(thread),
3011                                   (uintptr_t)voucher_name,
3012                                   VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
3013                                   1, 0);
3014
3015         if (IPC_VOUCHER_NULL != voucher)
3016                 ipc_voucher_release(voucher);
3017
3018         return KERN_SUCCESS;
3019 }
3020
3021 /*
3022  *  thread_get_mach_voucher - return a voucher reference for the specified thread voucher
3023  *
3024  *  Conditions:  nothing locked
3025  *
3026  *  A reference to the voucher may be lazily pending, if someone set the voucher name
3027  *  but nobody has done a lookup yet.  In that case, we'll have to do the equivalent
3028  *  lookup here.
3029  *
3030  *  NOTE:       At the moment, there is no distinction between the current and effective
3031  *              vouchers because we only set them at the thread level currently.
3032  */
3033 kern_return_t
3034 thread_get_mach_voucher(
3035         thread_act_t            thread,
3036         mach_voucher_selector_t __unused which,
3037         ipc_voucher_t           *voucherp)
3038 {
3039         ipc_voucher_t           voucher;
3040         mach_port_name_t        voucher_name;
3041
3042         if (THREAD_NULL == thread)
3043                 return KERN_INVALID_ARGUMENT;
3044
3045         thread_mtx_lock(thread);
3046         voucher = thread->ith_voucher;
3047
3048         /* if already cached, just return a ref */
3049         if (IPC_VOUCHER_NULL != voucher) {
3050                 ipc_voucher_reference(voucher);
3051                 thread_mtx_unlock(thread);
3052                 *voucherp = voucher;
3053                 return KERN_SUCCESS;
3054         }
3055
3056         voucher_name = thread->ith_voucher_name;
3057
3058         /* convert the name to a port, then voucher reference */
3059         if (MACH_PORT_VALID(voucher_name)) {
3060                 ipc_port_t port;
3061
3062                 if (KERN_SUCCESS !=
3063                     ipc_object_copyin(thread->task->itk_space, voucher_name,
3064                                       MACH_MSG_TYPE_COPY_SEND, (ipc_object_t *)&port)) {
3065                         thread->ith_voucher_name = MACH_PORT_NULL;
3066                         thread_mtx_unlock(thread);
3067                         *voucherp = IPC_VOUCHER_NULL;
3068                         return KERN_SUCCESS;
3069                 }
3070
3071                 /* convert to a voucher ref to return, and cache a ref on thread */
3072                 voucher = convert_port_to_voucher(port);
3073                 ipc_voucher_reference(voucher);
3074                 thread->ith_voucher = voucher;
3075                 thread_mtx_unlock(thread);
3076
3077                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3078                                           MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
3079                                           (uintptr_t)thread_tid(thread),
3080                                           (uintptr_t)port,
3081                                           VM_KERNEL_ADDRPERM((uintptr_t)voucher),
3082                                           2, 0);
3083
3084
3085                 ipc_port_release_send(port);
3086         } else
3087                 thread_mtx_unlock(thread);
3088
3089         *voucherp = voucher;
3090         return KERN_SUCCESS;
3091 }
3092
3093 /*
3094  *  thread_set_mach_voucher - set a voucher reference for the specified thread voucher
3095  *
3096  *  Conditions: callers holds a reference on the voucher.
3097  *              nothing locked.
3098  *
3099  *  We grab another reference to the voucher and bind it to the thread.  Any lazy
3100  *  binding is erased.  The old voucher reference associated with the thread is
3101  *  discarded.
3102  */
3103 kern_return_t
3104 thread_set_mach_voucher(
3105         thread_t                thread,
3106         ipc_voucher_t           voucher)
3107 {
3108         ipc_voucher_t old_voucher;
3109         ledger_t bankledger = NULL;
3110         struct thread_group *banktg = NULL;
3111
3112         if (THREAD_NULL == thread)
3113                 return KERN_INVALID_ARGUMENT;
3114
3115         if (thread != current_thread() && thread->started)
3116                 return KERN_INVALID_ARGUMENT;
3117
3118         ipc_voucher_reference(voucher);
3119         bank_get_bank_ledger_and_thread_group(voucher, &bankledger, &banktg);
3120
3121         thread_mtx_lock(thread);
3122         old_voucher = thread->ith_voucher;
3123         thread->ith_voucher = voucher;
3124         thread->ith_voucher_name = MACH_PORT_NULL;
3125         thread_mtx_unlock(thread);
3126
3127         bank_swap_thread_bank_ledger(thread, bankledger);
3128
3129         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3130                                   MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
3131                                   (uintptr_t)thread_tid(thread),
3132                                   (uintptr_t)MACH_PORT_NULL,
3133                                   VM_KERNEL_ADDRPERM((uintptr_t)voucher),
3134                                   3, 0);
3135
3136         ipc_voucher_release(old_voucher);
3137
3138         return KERN_SUCCESS;
3139 }
3140
3141 /*
3142  *  thread_swap_mach_voucher - swap a voucher reference for the specified thread voucher
3143  *
3144  *  Conditions: callers holds a reference on the new and presumed old voucher(s).
3145  *              nothing locked.
3146  *
3147  *  This function is no longer supported.
3148  */
3149 kern_return_t
3150 thread_swap_mach_voucher(
3151         __unused thread_t               thread,
3152         __unused ipc_voucher_t          new_voucher,
3153         ipc_voucher_t                   *in_out_old_voucher)
3154 {
3155         /*
3156          * Currently this function is only called from a MIG generated
3157          * routine which doesn't release the reference on the voucher
3158          * addressed by in_out_old_voucher. To avoid leaking this reference,
3159          * a call to release it has been added here.
3160          */
3161         ipc_voucher_release(*in_out_old_voucher);
3162         return KERN_NOT_SUPPORTED;
3163 }
3164
3165 /*
3166  *  thread_get_current_voucher_origin_pid - get the pid of the originator of the current voucher.
3167  */
3168 kern_return_t
3169 thread_get_current_voucher_origin_pid(
3170         int32_t      *pid)
3171 {
3172         uint32_t buf_size;
3173         kern_return_t kr;
3174         thread_t thread = current_thread();
3175
3176         buf_size = sizeof(*pid);
3177         kr = mach_voucher_attr_command(thread->ith_voucher,
3178                 MACH_VOUCHER_ATTR_KEY_BANK,
3179                 BANK_ORIGINATOR_PID,
3180                 NULL,
3181                 0,
3182                 (mach_voucher_attr_content_t)pid,
3183                 &buf_size);
3184
3185         return kr;
3186 }
3187
3188
3189 boolean_t
3190 thread_has_thread_name(thread_t th)
3191 {
3192         if ((th) && (th->uthread)) {
3193                 return bsd_hasthreadname(th->uthread);
3194         }
3195
3196         /*
3197          * This is an odd case; clients may set the thread name based on the lack of
3198          * a name, but in this context there is no uthread to attach the name to.
3199          */
3200         return FALSE;
3201 }
3202
3203 void
3204 thread_set_thread_name(thread_t th, const char* name)
3205 {
3206         if ((th) && (th->uthread) && name) {
3207                 bsd_setthreadname(th->uthread, name);
3208         }
3209 }
3210
3211 void
3212 thread_set_honor_qlimit(thread_t thread)
3213 {
3214         thread->options |= TH_OPT_HONOR_QLIMIT;
3215 }
3216
3217 void
3218 thread_clear_honor_qlimit(thread_t thread)
3219 {
3220         thread->options &= (~TH_OPT_HONOR_QLIMIT);
3221 }
3222
3223 /*
3224  * thread_enable_send_importance - set/clear the SEND_IMPORTANCE thread option bit.
3225  */
3226 void thread_enable_send_importance(thread_t thread, boolean_t enable)
3227 {
3228         if (enable == TRUE)
3229                 thread->options |= TH_OPT_SEND_IMPORTANCE;
3230         else
3231                 thread->options &= ~TH_OPT_SEND_IMPORTANCE;
3232 }
3233
3234 /*
3235  * thread_set_allocation_name - .
3236  */
3237
3238 kern_allocation_name_t thread_set_allocation_name(kern_allocation_name_t new_name)
3239 {
3240         kern_allocation_name_t ret;
3241         thread_kernel_state_t kstate = thread_get_kernel_state(current_thread());
3242         ret = kstate->allocation_name;
3243         // fifo
3244         if (!new_name || !kstate->allocation_name) kstate->allocation_name = new_name;
3245         return ret;
3246 }
3247
3248 uint64_t
3249 thread_get_last_wait_duration(thread_t thread)
3250 {
3251         return thread->last_made_runnable_time - thread->last_run_time;
3252 }
3253
3254 #if CONFIG_DTRACE
3255 uint32_t dtrace_get_thread_predcache(thread_t thread)
3256 {
3257         if (thread != THREAD_NULL)
3258                 return thread->t_dtrace_predcache;
3259         else
3260                 return 0;
3261 }
3262
3263 int64_t dtrace_get_thread_vtime(thread_t thread)
3264 {
3265         if (thread != THREAD_NULL)
3266                 return thread->t_dtrace_vtime;
3267         else
3268                 return 0;
3269 }
3270
3271 int dtrace_get_thread_last_cpu_id(thread_t thread)
3272 {
3273         if ((thread != THREAD_NULL) && (thread->last_processor != PROCESSOR_NULL)) {
3274                 return thread->last_processor->cpu_id;
3275         } else {
3276                 return -1;
3277         }
3278 }
3279
3280 int64_t dtrace_get_thread_tracing(thread_t thread)
3281 {
3282         if (thread != THREAD_NULL)
3283                 return thread->t_dtrace_tracing;
3284         else
3285                 return 0;
3286 }
3287
3288 boolean_t dtrace_get_thread_reentering(thread_t thread)
3289 {
3290         if (thread != THREAD_NULL)
3291                 return (thread->options & TH_OPT_DTRACE) ? TRUE : FALSE;
3292         else
3293                 return 0;
3294 }
3295
3296 vm_offset_t dtrace_get_kernel_stack(thread_t thread)
3297 {
3298         if (thread != THREAD_NULL)
3299                 return thread->kernel_stack;
3300         else
3301                 return 0;
3302 }
3303
3304 #if KASAN
3305 struct kasan_thread_data *
3306 kasan_get_thread_data(thread_t thread)
3307 {
3308         return &thread->kasan_data;
3309 }
3310 #endif
3311
3312 int64_t dtrace_calc_thread_recent_vtime(thread_t thread)
3313 {
3314         if (thread != THREAD_NULL) {
3315                 processor_t             processor = current_processor();
3316                 uint64_t                                abstime = mach_absolute_time();
3317                 timer_t                                 timer;
3318
3319                 timer = PROCESSOR_DATA(processor, thread_timer);
3320
3321                 return timer_grab(&(thread->system_timer)) + timer_grab(&(thread->user_timer)) +
3322                                 (abstime - timer->tstamp); /* XXX need interrupts off to prevent missed time? */
3323         } else
3324                 return 0;
3325 }
3326
3327 void dtrace_set_thread_predcache(thread_t thread, uint32_t predcache)
3328 {
3329         if (thread != THREAD_NULL)
3330                 thread->t_dtrace_predcache = predcache;
3331 }
3332
3333 void dtrace_set_thread_vtime(thread_t thread, int64_t vtime)
3334 {
3335         if (thread != THREAD_NULL)
3336                 thread->t_dtrace_vtime = vtime;
3337 }
3338
3339 void dtrace_set_thread_tracing(thread_t thread, int64_t accum)
3340 {
3341         if (thread != THREAD_NULL)
3342                 thread->t_dtrace_tracing = accum;
3343 }
3344
3345 void dtrace_set_thread_reentering(thread_t thread, boolean_t vbool)
3346 {
3347         if (thread != THREAD_NULL) {
3348                 if (vbool)
3349                         thread->options |= TH_OPT_DTRACE;
3350                 else
3351                         thread->options &= (~TH_OPT_DTRACE);
3352         }
3353 }
3354
3355 vm_offset_t dtrace_set_thread_recover(thread_t thread, vm_offset_t recover)
3356 {
3357         vm_offset_t prev = 0;
3358
3359         if (thread != THREAD_NULL) {
3360                 prev = thread->recover;
3361                 thread->recover = recover;
3362         }
3363         return prev;
3364 }
3365
3366 void dtrace_thread_bootstrap(void)
3367 {
3368         task_t task = current_task();
3369
3370         if (task->thread_count == 1) {
3371                 thread_t thread = current_thread();
3372                 if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) {
3373                         thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS;
3374                         DTRACE_PROC(exec__success);
3375                         KDBG(BSDDBG_CODE(DBG_BSD_PROC,BSD_PROC_EXEC),
3376                              task_pid(task));
3377                 }
3378                 DTRACE_PROC(start);
3379         }
3380         DTRACE_PROC(lwp__start);
3381
3382 }
3383
3384 void
3385 dtrace_thread_didexec(thread_t thread)
3386 {
3387         thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS;
3388 }
3389 #endif /* CONFIG_DTRACE */