osfmk/kern/thread.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   kern/thread.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub
  61  *      Date:   1986
  62  *
  63  *      Thread management primitives implementation.
  64  */
  65 /*
  66  * Copyright (c) 1993 The University of Utah and
  67  * the Computer Systems Laboratory (CSL).  All rights reserved.
  68  *
  69  * Permission to use, copy, modify and distribute this software and its
  70  * documentation is hereby granted, provided that both the copyright
  71  * notice and this permission notice appear in all copies of the
  72  * software, derivative works or modified versions, and any portions
  73  * thereof, and that both notices appear in supporting documentation.
  74  *
  75  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  76  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  77  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  78  *
  79  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  80  * improvements that they make and grant CSL redistribution rights.
  81  *
  82  */
  83
  84 #include <mach/mach_types.h>
  85 #include <mach/boolean.h>
  86 #include <mach/policy.h>
  87 #include <mach/thread_info.h>
  88 #include <mach/thread_special_ports.h>
  89 #include <mach/thread_status.h>
  90 #include <mach/time_value.h>
  91 #include <mach/vm_param.h>
  92
  93 #include <machine/thread.h>
  94 #include <machine/pal_routines.h>
  95 #include <machine/limits.h>
  96
  97 #include <kern/kern_types.h>
  98 #include <kern/kalloc.h>
  99 #include <kern/cpu_data.h>
 100 #include <kern/counters.h>
 101 #include <kern/extmod_statistics.h>
 102 #include <kern/ipc_mig.h>
 103 #include <kern/ipc_tt.h>
 104 #include <kern/mach_param.h>
 105 #include <kern/machine.h>
 106 #include <kern/misc_protos.h>
 107 #include <kern/processor.h>
 108 #include <kern/queue.h>
 109 #include <kern/sched.h>
 110 #include <kern/sched_prim.h>
 111 #include <kern/sync_lock.h>
 112 #include <kern/syscall_subr.h>
 113 #include <kern/task.h>
 114 #include <kern/thread.h>
 115 #include <kern/thread_group.h>
 116 #include <kern/coalition.h>
 117 #include <kern/host.h>
 118 #include <kern/zalloc.h>
 119 #include <kern/assert.h>
 120 #include <kern/exc_resource.h>
 121 #include <kern/exc_guard.h>
 122 #include <kern/telemetry.h>
 123 #include <kern/policy_internal.h>
 124 #include <kern/turnstile.h>
 125
 126 #include <corpses/task_corpse.h>
 127 #if KPC
 128 #include <kern/kpc.h>
 129 #endif
 130
 131 #if MONOTONIC
 132 #include <kern/monotonic.h>
 133 #include <machine/monotonic.h>
 134 #endif /* MONOTONIC */
 135
 136 #include <ipc/ipc_kmsg.h>
 137 #include <ipc/ipc_port.h>
 138 #include <bank/bank_types.h>
 139
 140 #include <vm/vm_kern.h>
 141 #include <vm/vm_pageout.h>
 142
 143 #include <sys/kdebug.h>
 144 #include <sys/bsdtask_info.h>
 145 #include <mach/sdt.h>
 146 #include <san/kasan.h>
 147
 148 #include <stdatomic.h>
 149
 150 /*
 151  * Exported interfaces
 152  */
 153 #include <mach/task_server.h>
 154 #include <mach/thread_act_server.h>
 155 #include <mach/mach_host_server.h>
 156 #include <mach/host_priv_server.h>
 157 #include <mach/mach_voucher_server.h>
 158 #include <kern/policy_internal.h>
 159
 160 static struct zone                      *thread_zone;
 161 static lck_grp_attr_t           thread_lck_grp_attr;
 162 lck_attr_t                                      thread_lck_attr;
 163 lck_grp_t                                       thread_lck_grp;
 164
 165 struct zone                                     *thread_qos_override_zone;
 166
 167 decl_simple_lock_data(static,thread_stack_lock)
 168 static queue_head_t             thread_stack_queue;
 169
 170 decl_simple_lock_data(static,thread_terminate_lock)
 171 static queue_head_t             thread_terminate_queue;
 172
 173 static queue_head_t             thread_deallocate_queue;
 174
 175 static queue_head_t             turnstile_deallocate_queue;
 176
 177 static queue_head_t             crashed_threads_queue;
 178
 179 static queue_head_t             workq_deallocate_queue;
 180
 181 decl_simple_lock_data(static,thread_exception_lock)
 182 static queue_head_t             thread_exception_queue;
 183
 184 struct thread_exception_elt {
 185         queue_chain_t           elt;
 186         exception_type_t        exception_type;
 187         task_t                  exception_task;
 188         thread_t                exception_thread;
 189 };
 190
 191 static struct thread    thread_template, init_thread;
 192 static void thread_deallocate_enqueue(thread_t thread);
 193 static void thread_deallocate_complete(thread_t thread);
 194
 195 #ifdef MACH_BSD
 196 extern void proc_exit(void *);
 197 extern mach_exception_data_type_t proc_encode_exit_exception_code(void *);
 198 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 199 extern uint64_t get_return_to_kernel_offset_from_proc(void *p);
 200 extern int      proc_selfpid(void);
 201 extern void     proc_name(int, char*, int);
 202 extern char *   proc_name_address(void *p);
 203 #endif /* MACH_BSD */
 204
 205 extern int disable_exc_resource;
 206 extern int audio_active;
 207 extern int debug_task;
 208 int thread_max = CONFIG_THREAD_MAX;     /* Max number of threads */
 209 int task_threadmax = CONFIG_THREAD_MAX;
 210
 211 static uint64_t         thread_unique_id = 100;
 212
 213 struct _thread_ledger_indices thread_ledgers = { -1 };
 214 static ledger_template_t thread_ledger_template = NULL;
 215 static void init_thread_ledgers(void);
 216
 217 #if CONFIG_JETSAM
 218 void jetsam_on_ledger_cpulimit_exceeded(void);
 219 #endif
 220
 221 extern int task_thread_soft_limit;
 222 extern int exc_via_corpse_forking;
 223
 224 #if DEVELOPMENT || DEBUG
 225 extern int exc_resource_threads_enabled;
 226 #endif /* DEVELOPMENT || DEBUG */
 227
 228 /*
 229  * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry.
 230  *
 231  * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user
 232  *  stacktraces, aka micro-stackshots)
 233  */
 234 #define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70
 235
 236 int cpumon_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 237 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void);
 238 #if DEVELOPMENT || DEBUG
 239 void __attribute__((noinline)) SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(task_t, int);
 240 #endif /* DEVELOPMENT || DEBUG */
 241
 242 /*
 243  * The smallest interval over which we support limiting CPU consumption is 1ms
 244  */
 245 #define MINIMUM_CPULIMIT_INTERVAL_MS 1
 246
 247 os_refgrp_decl(static, thread_refgrp, "thread", NULL);
 248
 249 void
 250 thread_bootstrap(void)
 251 {
 252         /*
 253          *      Fill in a template thread for fast initialization.
 254          */
 255
 256 #if MACH_ASSERT
 257         thread_template.thread_magic = THREAD_MAGIC;
 258 #endif /* MACH_ASSERT */
 259
 260         thread_template.runq = PROCESSOR_NULL;
 261
 262         thread_template.reason = AST_NONE;
 263         thread_template.at_safe_point = FALSE;
 264         thread_template.wait_event = NO_EVENT64;
 265         thread_template.waitq = NULL;
 266         thread_template.wait_result = THREAD_WAITING;
 267         thread_template.options = THREAD_ABORTSAFE;
 268         thread_template.state = TH_WAIT | TH_UNINT;
 269         thread_template.wake_active = FALSE;
 270         thread_template.continuation = THREAD_CONTINUE_NULL;
 271         thread_template.parameter = NULL;
 272
 273         thread_template.importance = 0;
 274         thread_template.sched_mode = TH_MODE_NONE;
 275         thread_template.sched_flags = 0;
 276         thread_template.saved_mode = TH_MODE_NONE;
 277         thread_template.safe_release = 0;
 278         thread_template.th_sched_bucket = TH_BUCKET_RUN;
 279
 280         thread_template.sfi_class = SFI_CLASS_UNSPECIFIED;
 281         thread_template.sfi_wait_class = SFI_CLASS_UNSPECIFIED;
 282
 283         thread_template.active = 0;
 284         thread_template.started = 0;
 285         thread_template.static_param = 0;
 286         thread_template.policy_reset = 0;
 287
 288         thread_template.base_pri = BASEPRI_DEFAULT;
 289         thread_template.sched_pri = 0;
 290         thread_template.max_priority = 0;
 291         thread_template.task_priority = 0;
 292         thread_template.promotions = 0;
 293         thread_template.rwlock_count = 0;
 294         thread_template.waiting_for_mutex = NULL;
 295
 296
 297         thread_template.realtime.deadline = UINT64_MAX;
 298
 299         thread_template.quantum_remaining = 0;
 300         thread_template.last_run_time = 0;
 301         thread_template.last_made_runnable_time = THREAD_NOT_RUNNABLE;
 302         thread_template.last_basepri_change_time = THREAD_NOT_RUNNABLE;
 303         thread_template.same_pri_latency = 0;
 304
 305         thread_template.computation_metered = 0;
 306         thread_template.computation_epoch = 0;
 307
 308 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
 309         thread_template.sched_stamp = 0;
 310         thread_template.pri_shift = INT8_MAX;
 311         thread_template.sched_usage = 0;
 312         thread_template.cpu_usage = thread_template.cpu_delta = 0;
 313 #endif
 314         thread_template.c_switch = thread_template.p_switch = thread_template.ps_switch = 0;
 315
 316 #if MONOTONIC
 317         memset(&thread_template.t_monotonic, 0,
 318                         sizeof(thread_template.t_monotonic));
 319 #endif /* MONOTONIC */
 320
 321         thread_template.bound_processor = PROCESSOR_NULL;
 322         thread_template.last_processor = PROCESSOR_NULL;
 323
 324         thread_template.sched_call = NULL;
 325
 326         timer_init(&thread_template.user_timer);
 327         timer_init(&thread_template.system_timer);
 328         timer_init(&thread_template.ptime);
 329         timer_init(&thread_template.runnable_timer);
 330         thread_template.user_timer_save = 0;
 331         thread_template.system_timer_save = 0;
 332         thread_template.vtimer_user_save = 0;
 333         thread_template.vtimer_prof_save = 0;
 334         thread_template.vtimer_rlim_save = 0;
 335         thread_template.vtimer_qos_save  = 0;
 336
 337 #if CONFIG_SCHED_SFI
 338         thread_template.wait_sfi_begin_time = 0;
 339 #endif
 340
 341         thread_template.wait_timer_is_set = FALSE;
 342         thread_template.wait_timer_active = 0;
 343
 344         thread_template.depress_timer_active = 0;
 345
 346         thread_template.recover = (vm_offset_t)NULL;
 347
 348         thread_template.map = VM_MAP_NULL;
 349 #if DEVELOPMENT || DEBUG
 350         thread_template.pmap_footprint_suspended = FALSE;
 351 #endif /* DEVELOPMENT || DEBUG */
 352
 353 #if CONFIG_DTRACE
 354         thread_template.t_dtrace_predcache = 0;
 355         thread_template.t_dtrace_vtime = 0;
 356         thread_template.t_dtrace_tracing = 0;
 357 #endif /* CONFIG_DTRACE */
 358
 359 #if KPERF
 360         thread_template.kperf_flags = 0;
 361         thread_template.kperf_pet_gen = 0;
 362         thread_template.kperf_c_switch = 0;
 363         thread_template.kperf_pet_cnt = 0;
 364 #endif
 365
 366 #if KPC
 367         thread_template.kpc_buf = NULL;
 368 #endif
 369
 370 #if HYPERVISOR
 371         thread_template.hv_thread_target = NULL;
 372 #endif /* HYPERVISOR */
 373
 374 #if (DEVELOPMENT || DEBUG)
 375         thread_template.t_page_creation_throttled_hard = 0;
 376         thread_template.t_page_creation_throttled_soft = 0;
 377 #endif /* DEVELOPMENT || DEBUG */
 378         thread_template.t_page_creation_throttled = 0;
 379         thread_template.t_page_creation_count = 0;
 380         thread_template.t_page_creation_time = 0;
 381
 382         thread_template.affinity_set = NULL;
 383
 384         thread_template.syscalls_unix = 0;
 385         thread_template.syscalls_mach = 0;
 386
 387         thread_template.t_ledger = LEDGER_NULL;
 388         thread_template.t_threadledger = LEDGER_NULL;
 389         thread_template.t_bankledger = LEDGER_NULL;
 390         thread_template.t_deduct_bank_ledger_time = 0;
 391
 392         thread_template.requested_policy = (struct thread_requested_policy) {};
 393         thread_template.effective_policy = (struct thread_effective_policy) {};
 394
 395         bzero(&thread_template.overrides, sizeof(thread_template.overrides));
 396         thread_template.sync_ipc_overrides = 0;
 397
 398         thread_template.iotier_override = THROTTLE_LEVEL_NONE;
 399         thread_template.thread_io_stats = NULL;
 400 #if CONFIG_EMBEDDED
 401         thread_template.taskwatch = NULL;
 402 #endif /* CONFIG_EMBEDDED */
 403         thread_template.thread_callout_interrupt_wakeups = thread_template.thread_callout_platform_idle_wakeups = 0;
 404
 405         thread_template.thread_timer_wakeups_bin_1 = thread_template.thread_timer_wakeups_bin_2 = 0;
 406         thread_template.callout_woken_from_icontext = thread_template.callout_woken_from_platform_idle = 0;
 407
 408         thread_template.thread_tag = 0;
 409
 410         thread_template.ith_voucher_name = MACH_PORT_NULL;
 411         thread_template.ith_voucher = IPC_VOUCHER_NULL;
 412
 413         thread_template.th_work_interval = NULL;
 414
 415         init_thread = thread_template;
 416
 417         machine_set_current_thread(&init_thread);
 418 }
 419
 420 extern boolean_t allow_qos_policy_set;
 421
 422 void
 423 thread_init(void)
 424 {
 425         thread_zone = zinit(
 426                         sizeof(struct thread),
 427                         thread_max * sizeof(struct thread),
 428                         THREAD_CHUNK * sizeof(struct thread),
 429                         "threads");
 430
 431         thread_qos_override_zone = zinit(
 432                 sizeof(struct thread_qos_override),
 433                 4 * thread_max * sizeof(struct thread_qos_override),
 434                 PAGE_SIZE,
 435                 "thread qos override");
 436         zone_change(thread_qos_override_zone, Z_EXPAND, TRUE);
 437         zone_change(thread_qos_override_zone, Z_COLLECT, TRUE);
 438         zone_change(thread_qos_override_zone, Z_CALLERACCT, FALSE);
 439         zone_change(thread_qos_override_zone, Z_NOENCRYPT, TRUE);
 440
 441         lck_grp_attr_setdefault(&thread_lck_grp_attr);
 442         lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr);
 443         lck_attr_setdefault(&thread_lck_attr);
 444
 445         stack_init();
 446
 447         thread_policy_init();
 448
 449         /*
 450          *      Initialize any machine-dependent
 451          *      per-thread structures necessary.
 452          */
 453         machine_thread_init();
 454
 455         if (!PE_parse_boot_argn("cpumon_ustackshots_trigger_pct", &cpumon_ustackshots_trigger_pct,
 456                 sizeof (cpumon_ustackshots_trigger_pct))) {
 457                 cpumon_ustackshots_trigger_pct = CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT;
 458         }
 459
 460         PE_parse_boot_argn("-qos-policy-allow", &allow_qos_policy_set, sizeof(allow_qos_policy_set));
 461
 462         init_thread_ledgers();
 463 }
 464
 465 boolean_t
 466 thread_is_active(thread_t thread)
 467 {
 468         return (thread->active);
 469 }
 470
 471 void
 472 thread_corpse_continue(void)
 473 {
 474         thread_t thread = current_thread();
 475
 476         thread_terminate_internal(thread);
 477
 478         /*
 479          * Handle the thread termination directly
 480          * here instead of returning to userspace.
 481          */
 482         assert(thread->active == FALSE);
 483         thread_ast_clear(thread, AST_APC);
 484         thread_apc_ast(thread);
 485
 486         panic("thread_corpse_continue");
 487         /*NOTREACHED*/
 488 }
 489
 490 static void
 491 thread_terminate_continue(void)
 492 {
 493         panic("thread_terminate_continue");
 494         /*NOTREACHED*/
 495 }
 496
 497 /*
 498  *      thread_terminate_self:
 499  */
 500 void
 501 thread_terminate_self(void)
 502 {
 503         thread_t                thread = current_thread();
 504         task_t                  task;
 505         int threadcnt;
 506
 507         pal_thread_terminate_self(thread);
 508
 509         DTRACE_PROC(lwp__exit);
 510
 511         thread_mtx_lock(thread);
 512
 513         ipc_thread_disable(thread);
 514
 515         thread_mtx_unlock(thread);
 516
 517         thread_sched_call(thread, NULL);
 518
 519         spl_t s = splsched();
 520         thread_lock(thread);
 521
 522         thread_depress_abort_locked(thread);
 523
 524         thread_unlock(thread);
 525         splx(s);
 526
 527 #if CONFIG_EMBEDDED
 528         thead_remove_taskwatch(thread);
 529 #endif /* CONFIG_EMBEDDED */
 530
 531         work_interval_thread_terminate(thread);
 532
 533         thread_mtx_lock(thread);
 534
 535         thread_policy_reset(thread);
 536
 537         thread_mtx_unlock(thread);
 538
 539         bank_swap_thread_bank_ledger(thread, NULL);
 540
 541         if (kdebug_enable && bsd_hasthreadname(thread->uthread)) {
 542                 char threadname[MAXTHREADNAMESIZE];
 543                 bsd_getthreadname(thread->uthread, threadname);
 544                 kernel_debug_string_simple(TRACE_STRING_THREADNAME_PREV, threadname);
 545         }
 546
 547         task = thread->task;
 548         uthread_cleanup(task, thread->uthread, task->bsd_info);
 549
 550         if (kdebug_enable && task->bsd_info && !task_is_exec_copy(task)) {
 551                 /* trace out pid before we sign off */
 552                 long dbg_arg1 = 0;
 553                 long dbg_arg2 = 0;
 554
 555                 kdbg_trace_data(thread->task->bsd_info, &dbg_arg1, &dbg_arg2);
 556                 KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE_PID, dbg_arg1, dbg_arg2);
 557         }
 558
 559         /*
 560          * After this subtraction, this thread should never access
 561          * task->bsd_info unless it got 0 back from the hw_atomic_sub.  It
 562          * could be racing with other threads to be the last thread in the
 563          * process, and the last thread in the process will tear down the proc
 564          * structure and zero-out task->bsd_info.
 565          */
 566         threadcnt = hw_atomic_sub(&task->active_thread_count, 1);
 567
 568         /*
 569          * If we are the last thread to terminate and the task is
 570          * associated with a BSD process, perform BSD process exit.
 571          */
 572         if (threadcnt == 0 && task->bsd_info != NULL && !task_is_exec_copy(task)) {
 573                 mach_exception_data_type_t subcode = 0;
 574                 if (kdebug_enable) {
 575                         /* since we're the last thread in this process, trace out the command name too */
 576                         long args[4] = {};
 577                         kdbg_trace_string(thread->task->bsd_info, &args[0], &args[1], &args[2], &args[3]);
 578                         KDBG_RELEASE(TRACE_STRING_PROC_EXIT, args[0], args[1], args[2], args[3]);
 579                 }
 580
 581                 /* Get the exit reason before proc_exit */
 582                 subcode = proc_encode_exit_exception_code(task->bsd_info);
 583                 proc_exit(task->bsd_info);
 584                 /*
 585                  * if there is crash info in task
 586                  * then do the deliver action since this is
 587                  * last thread for this task.
 588                  */
 589                 if (task->corpse_info) {
 590                         task_deliver_crash_notification(task, current_thread(), EXC_RESOURCE, subcode);
 591                 }
 592         }
 593
 594         if (threadcnt == 0) {
 595                 task_lock(task);
 596                 if (task_is_a_corpse_fork(task)) {
 597                         thread_wakeup((event_t)&task->active_thread_count);
 598                 }
 599                 task_unlock(task);
 600         }
 601
 602         uthread_cred_free(thread->uthread);
 603
 604         s = splsched();
 605         thread_lock(thread);
 606
 607         /*
 608          * Ensure that the depress timer is no longer enqueued,
 609          * so the timer (stored in the thread) can be safely deallocated
 610          *
 611          * TODO: build timer_call_cancel_wait
 612          */
 613
 614         assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) == 0);
 615
 616         uint32_t delay_us = 1;
 617
 618         while (thread->depress_timer_active > 0) {
 619                 thread_unlock(thread);
 620                 splx(s);
 621
 622                 delay(delay_us++);
 623
 624                 if (delay_us > USEC_PER_SEC)
 625                         panic("depress timer failed to inactivate!"
 626                               "thread: %p depress_timer_active: %d",
 627                               thread, thread->depress_timer_active);
 628
 629                 s = splsched();
 630                 thread_lock(thread);
 631         }
 632
 633         /*
 634          *      Cancel wait timer, and wait for
 635          *      concurrent expirations.
 636          */
 637         if (thread->wait_timer_is_set) {
 638                 thread->wait_timer_is_set = FALSE;
 639
 640                 if (timer_call_cancel(&thread->wait_timer))
 641                         thread->wait_timer_active--;
 642         }
 643
 644         delay_us = 1;
 645
 646         while (thread->wait_timer_active > 0) {
 647                 thread_unlock(thread);
 648                 splx(s);
 649
 650                 delay(delay_us++);
 651
 652                 if (delay_us > USEC_PER_SEC)
 653                         panic("wait timer failed to inactivate!"
 654                               "thread: %p wait_timer_active: %d",
 655                               thread, thread->wait_timer_active);
 656
 657                 s = splsched();
 658                 thread_lock(thread);
 659         }
 660
 661         /*
 662          *      If there is a reserved stack, release it.
 663          */
 664         if (thread->reserved_stack != 0) {
 665                 stack_free_reserved(thread);
 666                 thread->reserved_stack = 0;
 667         }
 668
 669         /*
 670          *      Mark thread as terminating, and block.
 671          */
 672         thread->state |= TH_TERMINATE;
 673         thread_mark_wait_locked(thread, THREAD_UNINT);
 674
 675         assert((thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED) == 0);
 676         assert((thread->sched_flags & TH_SFLAG_RW_PROMOTED) == 0);
 677         assert((thread->sched_flags & TH_SFLAG_EXEC_PROMOTED) == 0);
 678         assert((thread->sched_flags & TH_SFLAG_PROMOTED) == 0);
 679         assert(thread->promotions == 0);
 680         assert(thread->was_promoted_on_wakeup == 0);
 681         assert(thread->waiting_for_mutex == NULL);
 682         assert(thread->rwlock_count == 0);
 683
 684         thread_unlock(thread);
 685         /* splsched */
 686
 687         thread_block((thread_continue_t)thread_terminate_continue);
 688         /*NOTREACHED*/
 689 }
 690
 691 static bool
 692 thread_ref_release(thread_t thread)
 693 {
 694         if (thread == THREAD_NULL) {
 695                 return false;
 696         }
 697
 698         assert_thread_magic(thread);
 699
 700         return os_ref_release(&thread->ref_count) == 0;
 701 }
 702
 703 /* Drop a thread refcount safely without triggering a zfree */
 704 void
 705 thread_deallocate_safe(thread_t thread)
 706 {
 707         if (__improbable(thread_ref_release(thread))) {
 708                 /* enqueue the thread for thread deallocate deamon to call thread_deallocate_complete */
 709                 thread_deallocate_enqueue(thread);
 710         }
 711 }
 712
 713 void
 714 thread_deallocate(thread_t thread)
 715 {
 716         if (__improbable(thread_ref_release(thread))) {
 717                 thread_deallocate_complete(thread);
 718         }
 719 }
 720
 721 void
 722 thread_deallocate_complete(
 723         thread_t                        thread)
 724 {
 725         task_t                          task;
 726
 727         assert_thread_magic(thread);
 728
 729         assert(os_ref_get_count(&thread->ref_count) == 0);
 730
 731         assert(thread_owned_workloops_count(thread) == 0);
 732
 733         if (!(thread->state & TH_TERMINATE2))
 734                 panic("thread_deallocate: thread not properly terminated\n");
 735
 736         assert(thread->runq == PROCESSOR_NULL);
 737
 738 #if KPC
 739         kpc_thread_destroy(thread);
 740 #endif
 741
 742         ipc_thread_terminate(thread);
 743
 744         proc_thread_qos_deallocate(thread);
 745
 746         task = thread->task;
 747
 748 #ifdef MACH_BSD
 749         {
 750                 void *ut = thread->uthread;
 751
 752                 thread->uthread = NULL;
 753                 uthread_zone_free(ut);
 754         }
 755 #endif /* MACH_BSD */
 756
 757         if (thread->t_ledger)
 758                 ledger_dereference(thread->t_ledger);
 759         if (thread->t_threadledger)
 760                 ledger_dereference(thread->t_threadledger);
 761
 762         assert(thread->turnstile != TURNSTILE_NULL);
 763         if (thread->turnstile)
 764                 turnstile_deallocate(thread->turnstile);
 765
 766         if (IPC_VOUCHER_NULL != thread->ith_voucher)
 767                 ipc_voucher_release(thread->ith_voucher);
 768
 769         if (thread->thread_io_stats)
 770                 kfree(thread->thread_io_stats, sizeof(struct io_stat_info));
 771
 772         if (thread->kernel_stack != 0)
 773                 stack_free(thread);
 774
 775         lck_mtx_destroy(&thread->mutex, &thread_lck_grp);
 776         machine_thread_destroy(thread);
 777
 778         task_deallocate(task);
 779
 780 #if MACH_ASSERT
 781         assert_thread_magic(thread);
 782         thread->thread_magic = 0;
 783 #endif /* MACH_ASSERT */
 784
 785         zfree(thread_zone, thread);
 786 }
 787
 788 void
 789 thread_starts_owning_workloop(thread_t thread)
 790 {
 791         atomic_fetch_add_explicit(&thread->kqwl_owning_count, 1,
 792                         memory_order_relaxed);
 793 }
 794
 795 void
 796 thread_ends_owning_workloop(thread_t thread)
 797 {
 798         __assert_only uint32_t count;
 799         count = atomic_fetch_sub_explicit(&thread->kqwl_owning_count, 1,
 800                         memory_order_relaxed);
 801         assert(count > 0);
 802 }
 803
 804 uint32_t
 805 thread_owned_workloops_count(thread_t thread)
 806 {
 807         return atomic_load_explicit(&thread->kqwl_owning_count,
 808                         memory_order_relaxed);
 809 }
 810
 811 /*
 812  *      thread_inspect_deallocate:
 813  *
 814  *      Drop a thread inspection reference.
 815  */
 816 void
 817 thread_inspect_deallocate(
 818         thread_inspect_t                thread_inspect)
 819 {
 820         return(thread_deallocate((thread_t)thread_inspect));
 821 }
 822
 823 /*
 824  *      thread_exception_daemon:
 825  *
 826  *      Deliver EXC_{RESOURCE,GUARD} exception
 827  */
 828 static void
 829 thread_exception_daemon(void)
 830 {
 831         struct thread_exception_elt *elt;
 832         task_t task;
 833         thread_t thread;
 834         exception_type_t etype;
 835
 836         simple_lock(&thread_exception_lock);
 837         while ((elt = (struct thread_exception_elt *)dequeue_head(&thread_exception_queue)) != NULL) {
 838                 simple_unlock(&thread_exception_lock);
 839
 840                 etype = elt->exception_type;
 841                 task = elt->exception_task;
 842                 thread = elt->exception_thread;
 843                 assert_thread_magic(thread);
 844
 845                 kfree(elt, sizeof (*elt));
 846
 847                 /* wait for all the threads in the task to terminate */
 848                 task_lock(task);
 849                 task_wait_till_threads_terminate_locked(task);
 850                 task_unlock(task);
 851
 852                 /* Consumes the task ref returned by task_generate_corpse_internal */
 853                 task_deallocate(task);
 854                 /* Consumes the thread ref returned by task_generate_corpse_internal */
 855                 thread_deallocate(thread);
 856
 857                 /* Deliver the notification, also clears the corpse. */
 858                 task_deliver_crash_notification(task, thread, etype, 0);
 859
 860                 simple_lock(&thread_exception_lock);
 861         }
 862
 863         assert_wait((event_t)&thread_exception_queue, THREAD_UNINT);
 864         simple_unlock(&thread_exception_lock);
 865
 866         thread_block((thread_continue_t)thread_exception_daemon);
 867 }
 868
 869 /*
 870  *      thread_exception_enqueue:
 871  *
 872  *      Enqueue a corpse port to be delivered an EXC_{RESOURCE,GUARD}.
 873  */
 874 void
 875 thread_exception_enqueue(
 876         task_t          task,
 877         thread_t        thread,
 878         exception_type_t etype)
 879 {
 880         assert(EXC_RESOURCE == etype || EXC_GUARD == etype);
 881         struct thread_exception_elt *elt = kalloc(sizeof (*elt));
 882         elt->exception_type = etype;
 883         elt->exception_task = task;
 884         elt->exception_thread = thread;
 885
 886         simple_lock(&thread_exception_lock);
 887         enqueue_tail(&thread_exception_queue, (queue_entry_t)elt);
 888         simple_unlock(&thread_exception_lock);
 889
 890         thread_wakeup((event_t)&thread_exception_queue);
 891 }
 892
 893 /*
 894  *      thread_copy_resource_info
 895  *
 896  *      Copy the resource info counters from source
 897  *      thread to destination thread.
 898  */
 899 void
 900 thread_copy_resource_info(
 901         thread_t dst_thread,
 902         thread_t src_thread)
 903 {
 904         dst_thread->c_switch = src_thread->c_switch;
 905         dst_thread->p_switch = src_thread->p_switch;
 906         dst_thread->ps_switch = src_thread->ps_switch;
 907         dst_thread->precise_user_kernel_time = src_thread->precise_user_kernel_time;
 908         dst_thread->user_timer = src_thread->user_timer;
 909         dst_thread->user_timer_save = src_thread->user_timer_save;
 910         dst_thread->system_timer = src_thread->system_timer;
 911         dst_thread->system_timer_save = src_thread->system_timer_save;
 912         dst_thread->runnable_timer = src_thread->runnable_timer;
 913         dst_thread->vtimer_user_save = src_thread->vtimer_user_save;
 914         dst_thread->vtimer_prof_save = src_thread->vtimer_prof_save;
 915         dst_thread->vtimer_rlim_save = src_thread->vtimer_rlim_save;
 916         dst_thread->vtimer_qos_save = src_thread->vtimer_qos_save;
 917         dst_thread->syscalls_unix = src_thread->syscalls_unix;
 918         dst_thread->syscalls_mach = src_thread->syscalls_mach;
 919         ledger_rollup(dst_thread->t_threadledger, src_thread->t_threadledger);
 920         *dst_thread->thread_io_stats = *src_thread->thread_io_stats;
 921 }
 922
 923 /*
 924  *      thread_terminate_daemon:
 925  *
 926  *      Perform final clean up for terminating threads.
 927  */
 928 static void
 929 thread_terminate_daemon(void)
 930 {
 931         thread_t        self, thread;
 932         task_t          task;
 933
 934         self = current_thread();
 935         self->options |= TH_OPT_SYSTEM_CRITICAL;
 936
 937         (void)splsched();
 938         simple_lock(&thread_terminate_lock);
 939
 940 thread_terminate_start:
 941         while ((thread = qe_dequeue_head(&thread_terminate_queue, struct thread, runq_links)) != THREAD_NULL) {
 942                 assert_thread_magic(thread);
 943
 944                 /*
 945                  * if marked for crash reporting, skip reaping.
 946                  * The corpse delivery thread will clear bit and enqueue
 947                  * for reaping when done
 948                  */
 949                 if (thread->inspection){
 950                         enqueue_tail(&crashed_threads_queue, &thread->runq_links);
 951                         continue;
 952                 }
 953
 954                 simple_unlock(&thread_terminate_lock);
 955                 (void)spllo();
 956
 957                 task = thread->task;
 958
 959                 task_lock(task);
 960                 task->total_user_time += timer_grab(&thread->user_timer);
 961                 task->total_ptime += timer_grab(&thread->ptime);
 962                 task->total_runnable_time += timer_grab(&thread->runnable_timer);
 963                 if (thread->precise_user_kernel_time) {
 964                         task->total_system_time += timer_grab(&thread->system_timer);
 965                 } else {
 966                         task->total_user_time += timer_grab(&thread->system_timer);
 967                 }
 968
 969                 task->c_switch += thread->c_switch;
 970                 task->p_switch += thread->p_switch;
 971                 task->ps_switch += thread->ps_switch;
 972
 973                 task->syscalls_unix += thread->syscalls_unix;
 974                 task->syscalls_mach += thread->syscalls_mach;
 975
 976                 task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
 977                 task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
 978                 task->task_gpu_ns += ml_gpu_stat(thread);
 979                 task->task_energy += ml_energy_stat(thread);
 980
 981 #if MONOTONIC
 982                 mt_terminate_update(task, thread);
 983 #endif /* MONOTONIC */
 984
 985                 thread_update_qos_cpu_time(thread);
 986
 987                 queue_remove(&task->threads, thread, thread_t, task_threads);
 988                 task->thread_count--;
 989
 990                 /*
 991                  * If the task is being halted, and there is only one thread
 992                  * left in the task after this one, then wakeup that thread.
 993                  */
 994                 if (task->thread_count == 1 && task->halting)
 995                         thread_wakeup((event_t)&task->halting);
 996
 997                 task_unlock(task);
 998
 999                 lck_mtx_lock(&tasks_threads_lock);
1000                 queue_remove(&threads, thread, thread_t, threads);
1001                 threads_count--;
1002                 lck_mtx_unlock(&tasks_threads_lock);
1003
1004                 thread_deallocate(thread);
1005
1006                 (void)splsched();
1007                 simple_lock(&thread_terminate_lock);
1008         }
1009
1010         while ((thread = qe_dequeue_head(&thread_deallocate_queue, struct thread, runq_links)) != THREAD_NULL) {
1011                 assert_thread_magic(thread);
1012
1013                 simple_unlock(&thread_terminate_lock);
1014                 (void)spllo();
1015
1016                 thread_deallocate_complete(thread);
1017
1018                 (void)splsched();
1019                 simple_lock(&thread_terminate_lock);
1020         }
1021
1022         struct turnstile *turnstile;
1023         while ((turnstile = qe_dequeue_head(&turnstile_deallocate_queue, struct turnstile, ts_deallocate_link)) != TURNSTILE_NULL) {
1024
1025                 simple_unlock(&thread_terminate_lock);
1026                 (void)spllo();
1027
1028                 turnstile_destroy(turnstile);
1029
1030                 (void)splsched();
1031                 simple_lock(&thread_terminate_lock);
1032         }
1033
1034         queue_entry_t qe;
1035
1036         /*
1037          * see workq_deallocate_enqueue: struct workqueue is opaque to thread.c and
1038          * we just link pieces of memory here
1039          */
1040         while ((qe = dequeue_head(&workq_deallocate_queue))) {
1041                 simple_unlock(&thread_terminate_lock);
1042                 (void)spllo();
1043
1044                 workq_destroy((struct workqueue *)qe);
1045
1046                 (void)splsched();
1047                 simple_lock(&thread_terminate_lock);
1048         }
1049
1050         /*
1051          * Check if something enqueued in thread terminate/deallocate queue
1052          * while processing workq deallocate queue
1053          */
1054         if (!queue_empty(&thread_terminate_queue) ||
1055             !queue_empty(&thread_deallocate_queue) ||
1056             !queue_empty(&turnstile_deallocate_queue))
1057                 goto thread_terminate_start;
1058
1059         assert_wait((event_t)&thread_terminate_queue, THREAD_UNINT);
1060         simple_unlock(&thread_terminate_lock);
1061         /* splsched */
1062
1063         self->options &= ~TH_OPT_SYSTEM_CRITICAL;
1064         thread_block((thread_continue_t)thread_terminate_daemon);
1065         /*NOTREACHED*/
1066 }
1067
1068 /*
1069  *      thread_terminate_enqueue:
1070  *
1071  *      Enqueue a terminating thread for final disposition.
1072  *
1073  *      Called at splsched.
1074  */
1075 void
1076 thread_terminate_enqueue(
1077         thread_t                thread)
1078 {
1079         KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE, thread->thread_id);
1080
1081         simple_lock(&thread_terminate_lock);
1082         enqueue_tail(&thread_terminate_queue, &thread->runq_links);
1083         simple_unlock(&thread_terminate_lock);
1084
1085         thread_wakeup((event_t)&thread_terminate_queue);
1086 }
1087
1088 /*
1089  *      thread_deallocate_enqueue:
1090  *
1091  *      Enqueue a thread for final deallocation.
1092  */
1093 static void
1094 thread_deallocate_enqueue(
1095         thread_t                thread)
1096 {
1097         spl_t s = splsched();
1098
1099         simple_lock(&thread_terminate_lock);
1100         enqueue_tail(&thread_deallocate_queue, &thread->runq_links);
1101         simple_unlock(&thread_terminate_lock);
1102
1103         thread_wakeup((event_t)&thread_terminate_queue);
1104         splx(s);
1105 }
1106
1107 /*
1108  *      turnstile_deallocate_enqueue:
1109  *
1110  *      Enqueue a turnstile for final deallocation.
1111  */
1112 void
1113 turnstile_deallocate_enqueue(
1114         struct turnstile *turnstile)
1115 {
1116         spl_t s = splsched();
1117
1118         simple_lock(&thread_terminate_lock);
1119         enqueue_tail(&turnstile_deallocate_queue, &turnstile->ts_deallocate_link);
1120         simple_unlock(&thread_terminate_lock);
1121
1122         thread_wakeup((event_t)&thread_terminate_queue);
1123         splx(s);
1124 }
1125
1126 /*
1127  *      workq_deallocate_enqueue:
1128  *
1129  *      Enqueue a workqueue for final deallocation.
1130  */
1131 void
1132 workq_deallocate_enqueue(
1133         struct workqueue *wq)
1134 {
1135         spl_t s = splsched();
1136
1137         simple_lock(&thread_terminate_lock);
1138         /*
1139          * this is just to delay a zfree(), so we link the memory with no regards
1140          * for how the struct looks like.
1141          */
1142         enqueue_tail(&workq_deallocate_queue, (queue_entry_t)wq);
1143         simple_unlock(&thread_terminate_lock);
1144
1145         thread_wakeup((event_t)&thread_terminate_queue);
1146         splx(s);
1147 }
1148
1149 /*
1150  * thread_terminate_crashed_threads:
1151  * walk the list of crashed threads and put back set of threads
1152  * who are no longer being inspected.
1153  */
1154 void
1155 thread_terminate_crashed_threads()
1156 {
1157         thread_t th_remove;
1158         boolean_t should_wake_terminate_queue = FALSE;
1159         spl_t s = splsched();
1160
1161         simple_lock(&thread_terminate_lock);
1162         /*
1163          * loop through the crashed threads queue
1164          * to put any threads that are not being inspected anymore
1165          */
1166
1167         qe_foreach_element_safe(th_remove, &crashed_threads_queue, runq_links) {
1168                 /* make sure current_thread is never in crashed queue */
1169                 assert(th_remove != current_thread());
1170
1171                 if (th_remove->inspection == FALSE) {
1172                         re_queue_tail(&thread_terminate_queue, &th_remove->runq_links);
1173                         should_wake_terminate_queue = TRUE;
1174                 }
1175         }
1176
1177         simple_unlock(&thread_terminate_lock);
1178         splx(s);
1179         if (should_wake_terminate_queue == TRUE) {
1180                 thread_wakeup((event_t)&thread_terminate_queue);
1181         }
1182 }
1183
1184 /*
1185  *      thread_stack_daemon:
1186  *
1187  *      Perform stack allocation as required due to
1188  *      invoke failures.
1189  */
1190 static void
1191 thread_stack_daemon(void)
1192 {
1193         thread_t                thread;
1194         spl_t                   s;
1195
1196         s = splsched();
1197         simple_lock(&thread_stack_lock);
1198
1199         while ((thread = qe_dequeue_head(&thread_stack_queue, struct thread, runq_links)) != THREAD_NULL) {
1200                 assert_thread_magic(thread);
1201
1202                 simple_unlock(&thread_stack_lock);
1203                 splx(s);
1204
1205                 /* allocate stack with interrupts enabled so that we can call into VM */
1206                 stack_alloc(thread);
1207
1208                 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_END, thread_tid(thread), 0, 0, 0, 0);
1209
1210                 s = splsched();
1211                 thread_lock(thread);
1212                 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1213                 thread_unlock(thread);
1214
1215                 simple_lock(&thread_stack_lock);
1216         }
1217
1218         assert_wait((event_t)&thread_stack_queue, THREAD_UNINT);
1219         simple_unlock(&thread_stack_lock);
1220         splx(s);
1221
1222         thread_block((thread_continue_t)thread_stack_daemon);
1223         /*NOTREACHED*/
1224 }
1225
1226 /*
1227  *      thread_stack_enqueue:
1228  *
1229  *      Enqueue a thread for stack allocation.
1230  *
1231  *      Called at splsched.
1232  */
1233 void
1234 thread_stack_enqueue(
1235         thread_t                thread)
1236 {
1237         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_START, thread_tid(thread), 0, 0, 0, 0);
1238         assert_thread_magic(thread);
1239
1240         simple_lock(&thread_stack_lock);
1241         enqueue_tail(&thread_stack_queue, &thread->runq_links);
1242         simple_unlock(&thread_stack_lock);
1243
1244         thread_wakeup((event_t)&thread_stack_queue);
1245 }
1246
1247 void
1248 thread_daemon_init(void)
1249 {
1250         kern_return_t   result;
1251         thread_t        thread = NULL;
1252
1253         simple_lock_init(&thread_terminate_lock, 0);
1254         queue_init(&thread_terminate_queue);
1255         queue_init(&thread_deallocate_queue);
1256         queue_init(&workq_deallocate_queue);
1257         queue_init(&turnstile_deallocate_queue);
1258         queue_init(&crashed_threads_queue);
1259
1260         result = kernel_thread_start_priority((thread_continue_t)thread_terminate_daemon, NULL, MINPRI_KERNEL, &thread);
1261         if (result != KERN_SUCCESS)
1262                 panic("thread_daemon_init: thread_terminate_daemon");
1263
1264         thread_deallocate(thread);
1265
1266         simple_lock_init(&thread_stack_lock, 0);
1267         queue_init(&thread_stack_queue);
1268
1269         result = kernel_thread_start_priority((thread_continue_t)thread_stack_daemon, NULL, BASEPRI_PREEMPT_HIGH, &thread);
1270         if (result != KERN_SUCCESS)
1271                 panic("thread_daemon_init: thread_stack_daemon");
1272
1273         thread_deallocate(thread);
1274
1275         simple_lock_init(&thread_exception_lock, 0);
1276         queue_init(&thread_exception_queue);
1277
1278         result = kernel_thread_start_priority((thread_continue_t)thread_exception_daemon, NULL, MINPRI_KERNEL, &thread);
1279         if (result != KERN_SUCCESS)
1280                 panic("thread_daemon_init: thread_exception_daemon");
1281
1282         thread_deallocate(thread);
1283 }
1284
1285 #define TH_OPTION_NONE          0x00
1286 #define TH_OPTION_NOCRED        0x01
1287 #define TH_OPTION_NOSUSP        0x02
1288 #define TH_OPTION_WORKQ         0x04
1289
1290 /*
1291  * Create a new thread.
1292  * Doesn't start the thread running.
1293  *
1294  * Task and tasks_threads_lock are returned locked on success.
1295  */
1296 static kern_return_t
1297 thread_create_internal(
1298         task_t                                  parent_task,
1299         integer_t                               priority,
1300         thread_continue_t               continuation,
1301         void                                    *parameter,
1302         int                                             options,
1303         thread_t                                *out_thread)
1304 {
1305         thread_t                                new_thread;
1306         static thread_t                 first_thread;
1307
1308         /*
1309          *      Allocate a thread and initialize static fields
1310          */
1311         if (first_thread == THREAD_NULL)
1312                 new_thread = first_thread = current_thread();
1313         else
1314                 new_thread = (thread_t)zalloc(thread_zone);
1315         if (new_thread == THREAD_NULL)
1316                 return (KERN_RESOURCE_SHORTAGE);
1317
1318         if (new_thread != first_thread)
1319                 *new_thread = thread_template;
1320
1321         os_ref_init_count(&new_thread->ref_count, &thread_refgrp, 2);
1322
1323 #ifdef MACH_BSD
1324         new_thread->uthread = uthread_alloc(parent_task, new_thread, (options & TH_OPTION_NOCRED) != 0);
1325         if (new_thread->uthread == NULL) {
1326 #if MACH_ASSERT
1327                 new_thread->thread_magic = 0;
1328 #endif /* MACH_ASSERT */
1329
1330                 zfree(thread_zone, new_thread);
1331                 return (KERN_RESOURCE_SHORTAGE);
1332         }
1333 #endif  /* MACH_BSD */
1334
1335         if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) {
1336 #ifdef MACH_BSD
1337                 void *ut = new_thread->uthread;
1338
1339                 new_thread->uthread = NULL;
1340                 /* cred free may not be necessary */
1341                 uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1342                 uthread_cred_free(ut);
1343                 uthread_zone_free(ut);
1344 #endif  /* MACH_BSD */
1345
1346 #if MACH_ASSERT
1347                 new_thread->thread_magic = 0;
1348 #endif /* MACH_ASSERT */
1349
1350                 zfree(thread_zone, new_thread);
1351                 return (KERN_FAILURE);
1352         }
1353
1354         new_thread->task = parent_task;
1355
1356         thread_lock_init(new_thread);
1357         wake_lock_init(new_thread);
1358
1359         lck_mtx_init(&new_thread->mutex, &thread_lck_grp, &thread_lck_attr);
1360
1361         ipc_thread_init(new_thread);
1362
1363         new_thread->continuation = continuation;
1364         new_thread->parameter = parameter;
1365         new_thread->inheritor_flags = TURNSTILE_UPDATE_FLAGS_NONE;
1366         priority_queue_init(&new_thread->inheritor_queue,
1367                         PRIORITY_QUEUE_BUILTIN_MAX_HEAP);
1368
1369         /* Allocate I/O Statistics structure */
1370         new_thread->thread_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1371         assert(new_thread->thread_io_stats != NULL);
1372         bzero(new_thread->thread_io_stats, sizeof(struct io_stat_info));
1373         new_thread->sync_ipc_overrides = 0;
1374
1375 #if KASAN
1376         kasan_init_thread(&new_thread->kasan_data);
1377 #endif
1378
1379 #if CONFIG_IOSCHED
1380         /* Clear out the I/O Scheduling info for AppleFSCompression */
1381         new_thread->decmp_upl = NULL;
1382 #endif /* CONFIG_IOSCHED */
1383
1384 #if DEVELOPMENT || DEBUG
1385         task_lock(parent_task);
1386         uint16_t thread_limit = parent_task->task_thread_limit;
1387         if (exc_resource_threads_enabled &&
1388             thread_limit > 0 &&
1389             parent_task->thread_count >= thread_limit &&
1390             !parent_task->task_has_crossed_thread_limit &&
1391             !(parent_task->t_flags & TF_CORPSE)) {
1392                 int thread_count = parent_task->thread_count;
1393                 parent_task->task_has_crossed_thread_limit = TRUE;
1394                 task_unlock(parent_task);
1395                 SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(parent_task, thread_count);
1396         }
1397         else {
1398                 task_unlock(parent_task);
1399         }
1400 #endif
1401
1402         lck_mtx_lock(&tasks_threads_lock);
1403         task_lock(parent_task);
1404
1405         /*
1406          * Fail thread creation if parent task is being torn down or has too many threads
1407          * If the caller asked for TH_OPTION_NOSUSP, also fail if the parent task is suspended
1408          */
1409         if (parent_task->active == 0 || parent_task->halting ||
1410             (parent_task->suspend_count > 0 && (options & TH_OPTION_NOSUSP) != 0) ||
1411             (parent_task->thread_count >= task_threadmax && parent_task != kernel_task)) {
1412                 task_unlock(parent_task);
1413                 lck_mtx_unlock(&tasks_threads_lock);
1414
1415 #ifdef MACH_BSD
1416                 {
1417                         void *ut = new_thread->uthread;
1418
1419                         new_thread->uthread = NULL;
1420                         uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1421                         /* cred free may not be necessary */
1422                         uthread_cred_free(ut);
1423                         uthread_zone_free(ut);
1424                 }
1425 #endif  /* MACH_BSD */
1426                 ipc_thread_disable(new_thread);
1427                 ipc_thread_terminate(new_thread);
1428                 kfree(new_thread->thread_io_stats, sizeof(struct io_stat_info));
1429                 lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp);
1430                 machine_thread_destroy(new_thread);
1431                 zfree(thread_zone, new_thread);
1432                 return (KERN_FAILURE);
1433         }
1434
1435         /* New threads inherit any default state on the task */
1436         machine_thread_inherit_taskwide(new_thread, parent_task);
1437
1438         task_reference_internal(parent_task);
1439
1440         if (new_thread->task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
1441                 /*
1442                  * This task has a per-thread CPU limit; make sure this new thread
1443                  * gets its limit set too, before it gets out of the kernel.
1444                  */
1445                 act_set_astledger(new_thread);
1446         }
1447
1448         /* Instantiate a thread ledger. Do not fail thread creation if ledger creation fails. */
1449         if ((new_thread->t_threadledger = ledger_instantiate(thread_ledger_template,
1450                                 LEDGER_CREATE_INACTIVE_ENTRIES)) != LEDGER_NULL) {
1451
1452                 ledger_entry_setactive(new_thread->t_threadledger, thread_ledgers.cpu_time);
1453         }
1454
1455         new_thread->t_bankledger = LEDGER_NULL;
1456         new_thread->t_deduct_bank_ledger_time = 0;
1457         new_thread->t_deduct_bank_ledger_energy = 0;
1458
1459         new_thread->t_ledger = new_thread->task->ledger;
1460         if (new_thread->t_ledger)
1461                 ledger_reference(new_thread->t_ledger);
1462
1463 #if defined(CONFIG_SCHED_MULTIQ)
1464         /* Cache the task's sched_group */
1465         new_thread->sched_group = parent_task->sched_group;
1466 #endif /* defined(CONFIG_SCHED_MULTIQ) */
1467
1468         /* Cache the task's map */
1469         new_thread->map = parent_task->map;
1470
1471         timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread);
1472         timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread);
1473
1474 #if KPC
1475         kpc_thread_create(new_thread);
1476 #endif
1477
1478         /* Set the thread's scheduling parameters */
1479         new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task);
1480         new_thread->max_priority = parent_task->max_priority;
1481         new_thread->task_priority = parent_task->priority;
1482
1483         int new_priority = (priority < 0) ? parent_task->priority: priority;
1484         new_priority = (priority < 0)? parent_task->priority: priority;
1485         if (new_priority > new_thread->max_priority)
1486                 new_priority = new_thread->max_priority;
1487 #if CONFIG_EMBEDDED
1488         if (new_priority < MAXPRI_THROTTLE) {
1489                 new_priority = MAXPRI_THROTTLE;
1490         }
1491 #endif /* CONFIG_EMBEDDED */
1492
1493         new_thread->importance = new_priority - new_thread->task_priority;
1494
1495         sched_set_thread_base_priority(new_thread, new_priority);
1496
1497 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1498         new_thread->sched_stamp = sched_tick;
1499         new_thread->pri_shift = sched_pri_shifts[new_thread->th_sched_bucket];
1500 #endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) */
1501
1502 #if CONFIG_EMBEDDED
1503         if (parent_task->max_priority <= MAXPRI_THROTTLE)
1504                 sched_thread_mode_demote(new_thread, TH_SFLAG_THROTTLED);
1505 #endif /* CONFIG_EMBEDDED */
1506
1507         thread_policy_create(new_thread);
1508
1509         /* Chain the thread onto the task's list */
1510         queue_enter(&parent_task->threads, new_thread, thread_t, task_threads);
1511         parent_task->thread_count++;
1512
1513         /* So terminating threads don't need to take the task lock to decrement */
1514         hw_atomic_add(&parent_task->active_thread_count, 1);
1515
1516         /* Protected by the tasks_threads_lock */
1517         new_thread->thread_id = ++thread_unique_id;
1518
1519
1520         queue_enter(&threads, new_thread, thread_t, threads);
1521         threads_count++;
1522
1523         new_thread->active = TRUE;
1524         if (task_is_a_corpse_fork(parent_task)) {
1525                 /* Set the inspection bit if the task is a corpse fork */
1526                 new_thread->inspection = TRUE;
1527         } else {
1528                 new_thread->inspection = FALSE;
1529         }
1530         new_thread->corpse_dup = FALSE;
1531         new_thread->turnstile = turnstile_alloc();
1532         *out_thread = new_thread;
1533
1534         if (kdebug_enable) {
1535                 long args[4] = {};
1536
1537                 kdbg_trace_data(parent_task->bsd_info, &args[1], &args[3]);
1538
1539                 /*
1540                  * Starting with 26604425, exec'ing creates a new task/thread.
1541                  *
1542                  * NEWTHREAD in the current process has two possible meanings:
1543                  *
1544                  * 1) Create a new thread for this process.
1545                  * 2) Create a new thread for the future process this will become in an
1546                  * exec.
1547                  *
1548                  * To disambiguate these, arg3 will be set to TRUE for case #2.
1549                  *
1550                  * The value we need to find (TPF_EXEC_COPY) is stable in the case of a
1551                  * task exec'ing. The read of t_procflags does not take the proc_lock.
1552                  */
1553                 args[2] = task_is_exec_copy(parent_task) ? 1 : 0;
1554
1555                 KDBG_RELEASE(TRACE_DATA_NEWTHREAD, (uintptr_t)thread_tid(new_thread),
1556                                 args[1], args[2], args[3]);
1557
1558                 kdbg_trace_string(parent_task->bsd_info, &args[0], &args[1],
1559                                 &args[2], &args[3]);
1560                 KDBG_RELEASE(TRACE_STRING_NEWTHREAD, args[0], args[1], args[2],
1561                                 args[3]);
1562         }
1563
1564         DTRACE_PROC1(lwp__create, thread_t, *out_thread);
1565
1566         return (KERN_SUCCESS);
1567 }
1568
1569 static kern_return_t
1570 thread_create_internal2(
1571         task_t                          task,
1572         thread_t                        *new_thread,
1573         boolean_t                       from_user,
1574         thread_continue_t               continuation)
1575 {
1576         kern_return_t           result;
1577         thread_t                        thread;
1578
1579         if (task == TASK_NULL || task == kernel_task)
1580                 return (KERN_INVALID_ARGUMENT);
1581
1582         result = thread_create_internal(task, -1, continuation, NULL, TH_OPTION_NONE, &thread);
1583         if (result != KERN_SUCCESS)
1584                 return (result);
1585
1586         thread->user_stop_count = 1;
1587         thread_hold(thread);
1588         if (task->suspend_count > 0)
1589                 thread_hold(thread);
1590
1591         if (from_user)
1592                 extmod_statistics_incr_thread_create(task);
1593
1594         task_unlock(task);
1595         lck_mtx_unlock(&tasks_threads_lock);
1596
1597         *new_thread = thread;
1598
1599         return (KERN_SUCCESS);
1600 }
1601
1602 /* No prototype, since task_server.h has the _from_user version if KERNEL_SERVER */
1603 kern_return_t
1604 thread_create(
1605         task_t                          task,
1606         thread_t                        *new_thread);
1607
1608 kern_return_t
1609 thread_create(
1610         task_t                          task,
1611         thread_t                        *new_thread)
1612 {
1613         return thread_create_internal2(task, new_thread, FALSE, (thread_continue_t)thread_bootstrap_return);
1614 }
1615
1616 kern_return_t
1617 thread_create_from_user(
1618         task_t                          task,
1619         thread_t                        *new_thread)
1620 {
1621         return thread_create_internal2(task, new_thread, TRUE, (thread_continue_t)thread_bootstrap_return);
1622 }
1623
1624 kern_return_t
1625 thread_create_with_continuation(
1626         task_t                          task,
1627         thread_t                        *new_thread,
1628         thread_continue_t               continuation)
1629 {
1630         return thread_create_internal2(task, new_thread, FALSE, continuation);
1631 }
1632
1633 /*
1634  * Create a thread that is already started, but is waiting on an event
1635  */
1636 static kern_return_t
1637 thread_create_waiting_internal(
1638         task_t                  task,
1639         thread_continue_t       continuation,
1640         event_t                 event,
1641         block_hint_t            block_hint,
1642         int                     options,
1643         thread_t                *new_thread)
1644 {
1645         kern_return_t result;
1646         thread_t thread;
1647
1648         if (task == TASK_NULL || task == kernel_task)
1649                 return (KERN_INVALID_ARGUMENT);
1650
1651         result = thread_create_internal(task, -1, continuation, NULL,
1652                         options, &thread);
1653         if (result != KERN_SUCCESS)
1654                 return (result);
1655
1656         /* note no user_stop_count or thread_hold here */
1657
1658         if (task->suspend_count > 0)
1659                 thread_hold(thread);
1660
1661         thread_mtx_lock(thread);
1662         thread_set_pending_block_hint(thread, block_hint);
1663         if (options & TH_OPTION_WORKQ) {
1664                 thread->static_param = true;
1665                 event = workq_thread_init_and_wq_lock(task, thread);
1666         }
1667         thread_start_in_assert_wait(thread, event, THREAD_INTERRUPTIBLE);
1668         thread_mtx_unlock(thread);
1669
1670         task_unlock(task);
1671         lck_mtx_unlock(&tasks_threads_lock);
1672
1673         *new_thread = thread;
1674
1675         return (KERN_SUCCESS);
1676 }
1677
1678 kern_return_t
1679 thread_create_waiting(
1680         task_t                  task,
1681         thread_continue_t       continuation,
1682         event_t                 event,
1683         thread_t                *new_thread)
1684 {
1685         return thread_create_waiting_internal(task, continuation, event,
1686                         kThreadWaitNone, TH_OPTION_NONE, new_thread);
1687 }
1688
1689
1690 static kern_return_t
1691 thread_create_running_internal2(
1692         task_t         task,
1693         int                     flavor,
1694         thread_state_t          new_state,
1695         mach_msg_type_number_t  new_state_count,
1696         thread_t                                *new_thread,
1697         boolean_t                               from_user)
1698 {
1699         kern_return_t  result;
1700         thread_t                                thread;
1701
1702         if (task == TASK_NULL || task == kernel_task)
1703                 return (KERN_INVALID_ARGUMENT);
1704
1705         result = thread_create_internal(task, -1,
1706                         (thread_continue_t)thread_bootstrap_return, NULL,
1707                         TH_OPTION_NONE, &thread);
1708         if (result != KERN_SUCCESS)
1709                 return (result);
1710
1711         if (task->suspend_count > 0)
1712                 thread_hold(thread);
1713
1714         if (from_user) {
1715                 result = machine_thread_state_convert_from_user(thread, flavor,
1716                                 new_state, new_state_count);
1717         }
1718         if (result == KERN_SUCCESS) {
1719                 result = machine_thread_set_state(thread, flavor, new_state,
1720                                 new_state_count);
1721         }
1722         if (result != KERN_SUCCESS) {
1723                 task_unlock(task);
1724                 lck_mtx_unlock(&tasks_threads_lock);
1725
1726                 thread_terminate(thread);
1727                 thread_deallocate(thread);
1728                 return (result);
1729         }
1730
1731         thread_mtx_lock(thread);
1732         thread_start(thread);
1733         thread_mtx_unlock(thread);
1734
1735         if (from_user)
1736                 extmod_statistics_incr_thread_create(task);
1737
1738         task_unlock(task);
1739         lck_mtx_unlock(&tasks_threads_lock);
1740
1741         *new_thread = thread;
1742
1743         return (result);
1744 }
1745
1746 /* Prototype, see justification above */
1747 kern_return_t
1748 thread_create_running(
1749         task_t         task,
1750         int                     flavor,
1751         thread_state_t          new_state,
1752         mach_msg_type_number_t  new_state_count,
1753         thread_t                                *new_thread);
1754
1755 kern_return_t
1756 thread_create_running(
1757         task_t         task,
1758         int                     flavor,
1759         thread_state_t          new_state,
1760         mach_msg_type_number_t  new_state_count,
1761         thread_t                                *new_thread)
1762 {
1763         return thread_create_running_internal2(
1764                 task, flavor, new_state, new_state_count,
1765                 new_thread, FALSE);
1766 }
1767
1768 kern_return_t
1769 thread_create_running_from_user(
1770         task_t         task,
1771         int                     flavor,
1772         thread_state_t          new_state,
1773         mach_msg_type_number_t  new_state_count,
1774         thread_t                                *new_thread)
1775 {
1776         return thread_create_running_internal2(
1777                 task, flavor, new_state, new_state_count,
1778                 new_thread, TRUE);
1779 }
1780
1781 kern_return_t
1782 thread_create_workq_waiting(
1783         task_t              task,
1784         thread_continue_t   continuation,
1785         thread_t            *new_thread)
1786 {
1787         int options = TH_OPTION_NOCRED | TH_OPTION_NOSUSP | TH_OPTION_WORKQ;
1788         return thread_create_waiting_internal(task, continuation, NULL,
1789                         kThreadWaitParkedWorkQueue, options, new_thread);
1790 }
1791
1792 /*
1793  *      kernel_thread_create:
1794  *
1795  *      Create a thread in the kernel task
1796  *      to execute in kernel context.
1797  */
1798 kern_return_t
1799 kernel_thread_create(
1800         thread_continue_t       continuation,
1801         void                            *parameter,
1802         integer_t                       priority,
1803         thread_t                        *new_thread)
1804 {
1805         kern_return_t           result;
1806         thread_t                        thread;
1807         task_t                          task = kernel_task;
1808
1809         result = thread_create_internal(task, priority, continuation, parameter,
1810                         TH_OPTION_NOCRED | TH_OPTION_NONE, &thread);
1811         if (result != KERN_SUCCESS)
1812                 return (result);
1813
1814         task_unlock(task);
1815         lck_mtx_unlock(&tasks_threads_lock);
1816
1817         stack_alloc(thread);
1818         assert(thread->kernel_stack != 0);
1819 #if CONFIG_EMBEDDED
1820         if (priority > BASEPRI_KERNEL)
1821 #endif
1822         thread->reserved_stack = thread->kernel_stack;
1823
1824 if(debug_task & 1)
1825         kprintf("kernel_thread_create: thread = %p continuation = %p\n", thread, continuation);
1826         *new_thread = thread;
1827
1828         return (result);
1829 }
1830
1831 kern_return_t
1832 kernel_thread_start_priority(
1833         thread_continue_t       continuation,
1834         void                            *parameter,
1835         integer_t                       priority,
1836         thread_t                        *new_thread)
1837 {
1838         kern_return_t   result;
1839         thread_t                thread;
1840
1841         result = kernel_thread_create(continuation, parameter, priority, &thread);
1842         if (result != KERN_SUCCESS)
1843                 return (result);
1844
1845         *new_thread = thread;
1846
1847         thread_mtx_lock(thread);
1848         thread_start(thread);
1849         thread_mtx_unlock(thread);
1850
1851         return (result);
1852 }
1853
1854 kern_return_t
1855 kernel_thread_start(
1856         thread_continue_t       continuation,
1857         void                            *parameter,
1858         thread_t                        *new_thread)
1859 {
1860         return kernel_thread_start_priority(continuation, parameter, -1, new_thread);
1861 }
1862
1863 /* Separated into helper function so it can be used by THREAD_BASIC_INFO and THREAD_EXTENDED_INFO */
1864 /* it is assumed that the thread is locked by the caller */
1865 static void
1866 retrieve_thread_basic_info(thread_t thread, thread_basic_info_t basic_info)
1867 {
1868         int     state, flags;
1869
1870         /* fill in info */
1871
1872         thread_read_times(thread, &basic_info->user_time,
1873                         &basic_info->system_time, NULL);
1874
1875         /*
1876          *      Update lazy-evaluated scheduler info because someone wants it.
1877          */
1878         if (SCHED(can_update_priority)(thread))
1879                 SCHED(update_priority)(thread);
1880
1881         basic_info->sleep_time = 0;
1882
1883         /*
1884          *      To calculate cpu_usage, first correct for timer rate,
1885          *      then for 5/8 ageing.  The correction factor [3/5] is
1886          *      (1/(5/8) - 1).
1887          */
1888         basic_info->cpu_usage = 0;
1889 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1890         if (sched_tick_interval) {
1891                 basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage
1892                                                                         * TH_USAGE_SCALE) /     sched_tick_interval);
1893                 basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
1894         }
1895 #endif
1896
1897         if (basic_info->cpu_usage > TH_USAGE_SCALE)
1898                 basic_info->cpu_usage = TH_USAGE_SCALE;
1899
1900         basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
1901                                                                                         POLICY_TIMESHARE: POLICY_RR);
1902
1903         flags = 0;
1904         if (thread->options & TH_OPT_IDLE_THREAD)
1905                 flags |= TH_FLAGS_IDLE;
1906
1907         if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
1908                 flags |= TH_FLAGS_GLOBAL_FORCED_IDLE;
1909         }
1910
1911         if (!thread->kernel_stack)
1912                 flags |= TH_FLAGS_SWAPPED;
1913
1914         state = 0;
1915         if (thread->state & TH_TERMINATE)
1916                 state = TH_STATE_HALTED;
1917         else
1918         if (thread->state & TH_RUN)
1919                 state = TH_STATE_RUNNING;
1920         else
1921         if (thread->state & TH_UNINT)
1922                 state = TH_STATE_UNINTERRUPTIBLE;
1923         else
1924         if (thread->state & TH_SUSP)
1925                 state = TH_STATE_STOPPED;
1926         else
1927         if (thread->state & TH_WAIT)
1928                 state = TH_STATE_WAITING;
1929
1930         basic_info->run_state = state;
1931         basic_info->flags = flags;
1932
1933         basic_info->suspend_count = thread->user_stop_count;
1934
1935         return;
1936 }
1937
1938 kern_return_t
1939 thread_info_internal(
1940         thread_t                thread,
1941         thread_flavor_t                 flavor,
1942         thread_info_t                   thread_info_out,        /* ptr to OUT array */
1943         mach_msg_type_number_t  *thread_info_count)     /*IN/OUT*/
1944 {
1945         spl_t   s;
1946
1947         if (thread == THREAD_NULL)
1948                 return (KERN_INVALID_ARGUMENT);
1949
1950         if (flavor == THREAD_BASIC_INFO) {
1951
1952                 if (*thread_info_count < THREAD_BASIC_INFO_COUNT)
1953                         return (KERN_INVALID_ARGUMENT);
1954
1955                 s = splsched();
1956                 thread_lock(thread);
1957
1958                 retrieve_thread_basic_info(thread, (thread_basic_info_t) thread_info_out);
1959
1960                 thread_unlock(thread);
1961                 splx(s);
1962
1963                 *thread_info_count = THREAD_BASIC_INFO_COUNT;
1964
1965                 return (KERN_SUCCESS);
1966         }
1967         else
1968         if (flavor == THREAD_IDENTIFIER_INFO) {
1969                 thread_identifier_info_t        identifier_info;
1970
1971                 if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT)
1972                         return (KERN_INVALID_ARGUMENT);
1973
1974                 identifier_info = (thread_identifier_info_t) thread_info_out;
1975
1976                 s = splsched();
1977                 thread_lock(thread);
1978
1979                 identifier_info->thread_id = thread->thread_id;
1980                 identifier_info->thread_handle = thread->machine.cthread_self;
1981                 identifier_info->dispatch_qaddr = thread_dispatchqaddr(thread);
1982
1983                 thread_unlock(thread);
1984                 splx(s);
1985                 return KERN_SUCCESS;
1986         }
1987         else
1988         if (flavor == THREAD_SCHED_TIMESHARE_INFO) {
1989                 policy_timeshare_info_t         ts_info;
1990
1991                 if (*thread_info_count < POLICY_TIMESHARE_INFO_COUNT)
1992                         return (KERN_INVALID_ARGUMENT);
1993
1994                 ts_info = (policy_timeshare_info_t)thread_info_out;
1995
1996                 s = splsched();
1997                 thread_lock(thread);
1998
1999                 if (thread->sched_mode != TH_MODE_TIMESHARE) {
2000                         thread_unlock(thread);
2001                         splx(s);
2002                         return (KERN_INVALID_POLICY);
2003                 }
2004
2005                 ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
2006                 if (ts_info->depressed) {
2007                         ts_info->base_priority = DEPRESSPRI;
2008                         ts_info->depress_priority = thread->base_pri;
2009                 }
2010                 else {
2011                         ts_info->base_priority = thread->base_pri;
2012                         ts_info->depress_priority = -1;
2013                 }
2014
2015                 ts_info->cur_priority = thread->sched_pri;
2016                 ts_info->max_priority = thread->max_priority;
2017
2018                 thread_unlock(thread);
2019                 splx(s);
2020
2021                 *thread_info_count = POLICY_TIMESHARE_INFO_COUNT;
2022
2023                 return (KERN_SUCCESS);
2024         }
2025         else
2026         if (flavor == THREAD_SCHED_FIFO_INFO) {
2027                 if (*thread_info_count < POLICY_FIFO_INFO_COUNT)
2028                         return (KERN_INVALID_ARGUMENT);
2029
2030                 return (KERN_INVALID_POLICY);
2031         }
2032         else
2033         if (flavor == THREAD_SCHED_RR_INFO) {
2034                 policy_rr_info_t                        rr_info;
2035                 uint32_t quantum_time;
2036                 uint64_t quantum_ns;
2037
2038                 if (*thread_info_count < POLICY_RR_INFO_COUNT)
2039                         return (KERN_INVALID_ARGUMENT);
2040
2041                 rr_info = (policy_rr_info_t) thread_info_out;
2042
2043                 s = splsched();
2044                 thread_lock(thread);
2045
2046                 if (thread->sched_mode == TH_MODE_TIMESHARE) {
2047                         thread_unlock(thread);
2048                         splx(s);
2049
2050                         return (KERN_INVALID_POLICY);
2051             }
2052
2053                 rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
2054                 if (rr_info->depressed) {
2055                         rr_info->base_priority = DEPRESSPRI;
2056                         rr_info->depress_priority = thread->base_pri;
2057                 }
2058                 else {
2059                         rr_info->base_priority = thread->base_pri;
2060                         rr_info->depress_priority = -1;
2061                 }
2062
2063                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
2064                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
2065
2066                 rr_info->max_priority = thread->max_priority;
2067                 rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
2068
2069                 thread_unlock(thread);
2070                 splx(s);
2071
2072                 *thread_info_count = POLICY_RR_INFO_COUNT;
2073
2074                 return (KERN_SUCCESS);
2075         }
2076         else
2077         if (flavor == THREAD_EXTENDED_INFO) {
2078                 thread_basic_info_data_t        basic_info;
2079                 thread_extended_info_t          extended_info = (thread_extended_info_t) thread_info_out;
2080
2081                 if (*thread_info_count < THREAD_EXTENDED_INFO_COUNT) {
2082                         return (KERN_INVALID_ARGUMENT);
2083                 }
2084
2085                 s = splsched();
2086                 thread_lock(thread);
2087
2088                 /* NOTE: This mimics fill_taskthreadinfo(), which is the function used by proc_pidinfo() for
2089                  * the PROC_PIDTHREADINFO flavor (which can't be used on corpses)
2090                  */
2091                 retrieve_thread_basic_info(thread, &basic_info);
2092                 extended_info->pth_user_time = ((basic_info.user_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.user_time.microseconds * (integer_t)NSEC_PER_USEC));
2093                 extended_info->pth_system_time = ((basic_info.system_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.system_time.microseconds * (integer_t)NSEC_PER_USEC));
2094
2095                 extended_info->pth_cpu_usage = basic_info.cpu_usage;
2096                 extended_info->pth_policy = basic_info.policy;
2097                 extended_info->pth_run_state = basic_info.run_state;
2098                 extended_info->pth_flags = basic_info.flags;
2099                 extended_info->pth_sleep_time = basic_info.sleep_time;
2100                 extended_info->pth_curpri = thread->sched_pri;
2101                 extended_info->pth_priority = thread->base_pri;
2102                 extended_info->pth_maxpriority = thread->max_priority;
2103
2104                 bsd_getthreadname(thread->uthread,extended_info->pth_name);
2105
2106                 thread_unlock(thread);
2107                 splx(s);
2108
2109                 *thread_info_count = THREAD_EXTENDED_INFO_COUNT;
2110
2111                 return (KERN_SUCCESS);
2112         }
2113         else
2114         if (flavor == THREAD_DEBUG_INFO_INTERNAL) {
2115 #if DEVELOPMENT || DEBUG
2116                 thread_debug_info_internal_t dbg_info;
2117                 if (*thread_info_count < THREAD_DEBUG_INFO_INTERNAL_COUNT)
2118                         return (KERN_NOT_SUPPORTED);
2119
2120                 if (thread_info_out == NULL)
2121                         return (KERN_INVALID_ARGUMENT);
2122
2123                 dbg_info = (thread_debug_info_internal_t) thread_info_out;
2124                 dbg_info->page_creation_count = thread->t_page_creation_count;
2125
2126                 *thread_info_count = THREAD_DEBUG_INFO_INTERNAL_COUNT;
2127                 return (KERN_SUCCESS);
2128 #endif /* DEVELOPMENT || DEBUG */
2129                 return (KERN_NOT_SUPPORTED);
2130         }
2131
2132         return (KERN_INVALID_ARGUMENT);
2133 }
2134
2135 void
2136 thread_read_times(
2137         thread_t                thread,
2138         time_value_t    *user_time,
2139         time_value_t    *system_time,
2140         time_value_t    *runnable_time)
2141 {
2142         clock_sec_t             secs;
2143         clock_usec_t    usecs;
2144         uint64_t                tval_user, tval_system;
2145
2146         tval_user = timer_grab(&thread->user_timer);
2147         tval_system = timer_grab(&thread->system_timer);
2148
2149         if (thread->precise_user_kernel_time) {
2150                 absolutetime_to_microtime(tval_user, &secs, &usecs);
2151                 user_time->seconds = (typeof(user_time->seconds))secs;
2152                 user_time->microseconds = usecs;
2153
2154                 absolutetime_to_microtime(tval_system, &secs, &usecs);
2155                 system_time->seconds = (typeof(system_time->seconds))secs;
2156                 system_time->microseconds = usecs;
2157         } else {
2158                 /* system_timer may represent either sys or user */
2159                 tval_user += tval_system;
2160                 absolutetime_to_microtime(tval_user, &secs, &usecs);
2161                 user_time->seconds = (typeof(user_time->seconds))secs;
2162                 user_time->microseconds = usecs;
2163
2164                 system_time->seconds = 0;
2165                 system_time->microseconds = 0;
2166         }
2167
2168         if (runnable_time) {
2169                 uint64_t tval_runnable = timer_grab(&thread->runnable_timer);
2170                 absolutetime_to_microtime(tval_runnable, &secs, &usecs);
2171                 runnable_time->seconds = (typeof(runnable_time->seconds))secs;
2172                 runnable_time->microseconds = usecs;
2173         }
2174 }
2175
2176 uint64_t thread_get_runtime_self(void)
2177 {
2178         boolean_t interrupt_state;
2179         uint64_t runtime;
2180         thread_t thread = NULL;
2181         processor_t processor = NULL;
2182
2183         thread = current_thread();
2184
2185         /* Not interrupt safe, as the scheduler may otherwise update timer values underneath us */
2186         interrupt_state = ml_set_interrupts_enabled(FALSE);
2187         processor = current_processor();
2188         timer_update(PROCESSOR_DATA(processor, thread_timer), mach_absolute_time());
2189         runtime = (timer_grab(&thread->user_timer) + timer_grab(&thread->system_timer));
2190         ml_set_interrupts_enabled(interrupt_state);
2191
2192         return runtime;
2193 }
2194
2195 kern_return_t
2196 thread_assign(
2197         __unused thread_t                       thread,
2198         __unused processor_set_t        new_pset)
2199 {
2200         return (KERN_FAILURE);
2201 }
2202
2203 /*
2204  *      thread_assign_default:
2205  *
2206  *      Special version of thread_assign for assigning threads to default
2207  *      processor set.
2208  */
2209 kern_return_t
2210 thread_assign_default(
2211         thread_t                thread)
2212 {
2213         return (thread_assign(thread, &pset0));
2214 }
2215
2216 /*
2217  *      thread_get_assignment
2218  *
2219  *      Return current assignment for this thread.
2220  */
2221 kern_return_t
2222 thread_get_assignment(
2223         thread_t                thread,
2224         processor_set_t *pset)
2225 {
2226         if (thread == NULL)
2227                 return (KERN_INVALID_ARGUMENT);
2228
2229         *pset = &pset0;
2230
2231         return (KERN_SUCCESS);
2232 }
2233
2234 /*
2235  *      thread_wire_internal:
2236  *
2237  *      Specify that the target thread must always be able
2238  *      to run and to allocate memory.
2239  */
2240 kern_return_t
2241 thread_wire_internal(
2242         host_priv_t             host_priv,
2243         thread_t                thread,
2244         boolean_t               wired,
2245         boolean_t               *prev_state)
2246 {
2247         if (host_priv == NULL || thread != current_thread())
2248                 return (KERN_INVALID_ARGUMENT);
2249
2250         assert(host_priv == &realhost);
2251
2252         if (prev_state)
2253             *prev_state = (thread->options & TH_OPT_VMPRIV) != 0;
2254
2255         if (wired) {
2256             if (!(thread->options & TH_OPT_VMPRIV))
2257                     vm_page_free_reserve(1);    /* XXX */
2258             thread->options |= TH_OPT_VMPRIV;
2259         }
2260         else {
2261             if (thread->options & TH_OPT_VMPRIV)
2262                     vm_page_free_reserve(-1);   /* XXX */
2263             thread->options &= ~TH_OPT_VMPRIV;
2264         }
2265
2266         return (KERN_SUCCESS);
2267 }
2268
2269
2270 /*
2271  *      thread_wire:
2272  *
2273  *      User-api wrapper for thread_wire_internal()
2274  */
2275 kern_return_t
2276 thread_wire(
2277         host_priv_t     host_priv,
2278         thread_t        thread,
2279         boolean_t       wired)
2280 {
2281     return (thread_wire_internal(host_priv, thread, wired, NULL));
2282 }
2283
2284
2285 boolean_t
2286 is_vm_privileged(void)
2287 {
2288         return current_thread()->options & TH_OPT_VMPRIV ? TRUE : FALSE;
2289 }
2290
2291 boolean_t
2292 set_vm_privilege(boolean_t privileged)
2293 {
2294         boolean_t       was_vmpriv;
2295
2296         if (current_thread()->options & TH_OPT_VMPRIV)
2297                 was_vmpriv = TRUE;
2298         else
2299                 was_vmpriv = FALSE;
2300
2301         if (privileged != FALSE)
2302                 current_thread()->options |= TH_OPT_VMPRIV;
2303         else
2304                 current_thread()->options &= ~TH_OPT_VMPRIV;
2305
2306         return (was_vmpriv);
2307 }
2308
2309 void
2310 set_thread_rwlock_boost(void)
2311 {
2312         current_thread()->rwlock_count++;
2313 }
2314
2315 void
2316 clear_thread_rwlock_boost(void)
2317 {
2318         thread_t thread = current_thread();
2319
2320         if ((thread->rwlock_count-- == 1) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2321
2322                 lck_rw_clear_promotion(thread, 0);
2323         }
2324 }
2325
2326
2327 /*
2328  * XXX assuming current thread only, for now...
2329  */
2330 void
2331 thread_guard_violation(thread_t thread,
2332     mach_exception_data_type_t code, mach_exception_data_type_t subcode)
2333 {
2334         assert(thread == current_thread());
2335
2336         /* don't set up the AST for kernel threads */
2337         if (thread->task == kernel_task)
2338                 return;
2339
2340         spl_t s = splsched();
2341         /*
2342          * Use the saved state area of the thread structure
2343          * to store all info required to handle the AST when
2344          * returning to userspace
2345          */
2346         assert(EXC_GUARD_DECODE_GUARD_TYPE(code));
2347         thread->guard_exc_info.code = code;
2348         thread->guard_exc_info.subcode = subcode;
2349         thread_ast_set(thread, AST_GUARD);
2350         ast_propagate(thread);
2351
2352         splx(s);
2353 }
2354
2355 /*
2356  *      guard_ast:
2357  *
2358  *      Handle AST_GUARD for a thread. This routine looks at the
2359  *      state saved in the thread structure to determine the cause
2360  *      of this exception. Based on this value, it invokes the
2361  *      appropriate routine which determines other exception related
2362  *      info and raises the exception.
2363  */
2364 void
2365 guard_ast(thread_t t)
2366 {
2367         const mach_exception_data_type_t
2368                 code = t->guard_exc_info.code,
2369                 subcode = t->guard_exc_info.subcode;
2370
2371         t->guard_exc_info.code = 0;
2372         t->guard_exc_info.subcode = 0;
2373
2374         switch (EXC_GUARD_DECODE_GUARD_TYPE(code)) {
2375         case GUARD_TYPE_NONE:
2376                 /* lingering AST_GUARD on the processor? */
2377                 break;
2378         case GUARD_TYPE_MACH_PORT:
2379                 mach_port_guard_ast(t, code, subcode);
2380                 break;
2381         case GUARD_TYPE_FD:
2382                 fd_guard_ast(t, code, subcode);
2383                 break;
2384 #if CONFIG_VNGUARD
2385         case GUARD_TYPE_VN:
2386                 vn_guard_ast(t, code, subcode);
2387                 break;
2388 #endif
2389         case GUARD_TYPE_VIRT_MEMORY:
2390                 virt_memory_guard_ast(t, code, subcode);
2391                 break;
2392         default:
2393                 panic("guard_exc_info %llx %llx", code, subcode);
2394         }
2395 }
2396
2397 static void
2398 thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1)
2399 {
2400         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
2401 #if CONFIG_TELEMETRY
2402                 /*
2403                  * This thread is in danger of violating the CPU usage monitor. Enable telemetry
2404                  * on the entire task so there are micro-stackshots available if and when
2405                  * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots
2406                  * for this thread only; but now that this task is suspect, knowing what all of
2407                  * its threads are up to will be useful.
2408                  */
2409                 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1);
2410 #endif
2411                 return;
2412         }
2413
2414 #if CONFIG_TELEMETRY
2415         /*
2416          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
2417          * exceeded the limit, turn telemetry off for the task.
2418          */
2419         telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0);
2420 #endif
2421
2422         if (warning == 0) {
2423                 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU();
2424         }
2425 }
2426
2427 void __attribute__((noinline))
2428 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void)
2429 {
2430         int          pid                = 0;
2431         task_t           task                           = current_task();
2432         thread_t     thread             = current_thread();
2433         uint64_t     tid                = thread->thread_id;
2434         const char       *procname          = "unknown";
2435         time_value_t thread_total_time  = {0, 0};
2436         time_value_t thread_system_time;
2437         time_value_t thread_user_time;
2438         int          action;
2439         uint8_t      percentage;
2440         uint32_t     usage_percent = 0;
2441         uint32_t     interval_sec;
2442         uint64_t     interval_ns;
2443         uint64_t     balance_ns;
2444         boolean_t        fatal = FALSE;
2445         boolean_t        send_exc_resource = TRUE; /* in addition to RESOURCE_NOTIFY */
2446         kern_return_t   kr;
2447
2448 #ifdef EXC_RESOURCE_MONITORS
2449         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
2450 #endif /* EXC_RESOURCE_MONITORS */
2451         struct ledger_entry_info        lei;
2452
2453         assert(thread->t_threadledger != LEDGER_NULL);
2454
2455         /*
2456          * Extract the fatal bit and suspend the monitor (which clears the bit).
2457          */
2458         task_lock(task);
2459         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) {
2460                 fatal = TRUE;
2461                 send_exc_resource = TRUE;
2462         }
2463         /* Only one thread can be here at a time.  Whichever makes it through
2464            first will successfully suspend the monitor and proceed to send the
2465            notification.  Other threads will get an error trying to suspend the
2466            monitor and give up on sending the notification.  In the first release,
2467            the monitor won't be resumed for a number of seconds, but we may
2468            eventually need to handle low-latency resume.
2469          */
2470         kr = task_suspend_cpumon(task);
2471         task_unlock(task);
2472         if (kr == KERN_INVALID_ARGUMENT)        return;
2473
2474 #ifdef MACH_BSD
2475         pid = proc_selfpid();
2476         if (task->bsd_info != NULL) {
2477                 procname = proc_name_address(task->bsd_info);
2478         }
2479 #endif
2480
2481         thread_get_cpulimit(&action, &percentage, &interval_ns);
2482
2483         interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC);
2484
2485         thread_read_times(thread, &thread_user_time, &thread_system_time, NULL);
2486         time_value_add(&thread_total_time, &thread_user_time);
2487         time_value_add(&thread_total_time, &thread_system_time);
2488         ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei);
2489
2490         /* credit/debit/balance/limit are in absolute time units;
2491            the refill info is in nanoseconds. */
2492         absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns);
2493         if (lei.lei_last_refill > 0) {
2494                 usage_percent = (uint32_t)((balance_ns*100ULL) / lei.lei_last_refill);
2495         }
2496
2497         /* TODO: show task total runtime (via TASK_ABSOLUTETIME_INFO)? */
2498         printf("process %s[%d] thread %llu caught burning CPU! "
2499                "It used more than %d%% CPU over %u seconds "
2500                "(actual recent usage: %d%% over ~%llu seconds).  "
2501                "Thread lifetime cpu usage %d.%06ds, (%d.%06d user, %d.%06d sys) "
2502                "ledger balance: %lld mabs credit: %lld mabs debit: %lld mabs "
2503                "limit: %llu mabs period: %llu ns last refill: %llu ns%s.\n",
2504                procname, pid, tid,
2505                percentage, interval_sec,
2506                usage_percent,
2507                (lei.lei_last_refill + NSEC_PER_SEC/2) / NSEC_PER_SEC,
2508                thread_total_time.seconds, thread_total_time.microseconds,
2509                thread_user_time.seconds, thread_user_time.microseconds,
2510                thread_system_time.seconds,thread_system_time.microseconds,
2511                lei.lei_balance, lei.lei_credit, lei.lei_debit,
2512                lei.lei_limit, lei.lei_refill_period, lei.lei_last_refill,
2513                (fatal ? " [fatal violation]" : ""));
2514
2515         /*
2516            For now, send RESOURCE_NOTIFY in parallel with EXC_RESOURCE.  Once
2517            we have logging parity, we will stop sending EXC_RESOURCE (24508922).
2518          */
2519
2520         /* RESOURCE_NOTIFY MIG specifies nanoseconds of CPU time */
2521         lei.lei_balance = balance_ns;
2522         absolutetime_to_nanoseconds(lei.lei_limit, &lei.lei_limit);
2523         trace_resource_violation(RMON_CPUUSAGE_VIOLATED, &lei);
2524         kr = send_resource_violation(send_cpu_usage_violation, task, &lei,
2525                                                                  fatal ? kRNFatalLimitFlag : 0);
2526         if (kr) {
2527                 printf("send_resource_violation(CPU usage, ...): error %#x\n", kr);
2528         }
2529
2530 #ifdef EXC_RESOURCE_MONITORS
2531         if (send_exc_resource) {
2532                 if (disable_exc_resource) {
2533                         printf("process %s[%d] thread %llu caught burning CPU! "
2534                                    "EXC_RESOURCE%s supressed by a boot-arg\n",
2535                                    procname, pid, tid, fatal ? " (and termination)" : "");
2536                         return;
2537                 }
2538
2539                 if (audio_active) {
2540                         printf("process %s[%d] thread %llu caught burning CPU! "
2541                            "EXC_RESOURCE & termination supressed due to audio playback\n",
2542                                    procname, pid, tid);
2543                         return;
2544                 }
2545         }
2546
2547
2548         if (send_exc_resource) {
2549                 code[0] = code[1] = 0;
2550                 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU);
2551                 if (fatal) {
2552                         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR_FATAL);
2553                 }else {
2554                         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR);
2555                 }
2556                 EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec);
2557                 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], percentage);
2558                 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent);
2559                 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
2560         }
2561 #endif /* EXC_RESOURCE_MONITORS */
2562
2563         if (fatal) {
2564 #if CONFIG_JETSAM
2565                 jetsam_on_ledger_cpulimit_exceeded();
2566 #else
2567                 task_terminate_internal(task);
2568 #endif
2569         }
2570 }
2571
2572 #if DEVELOPMENT || DEBUG
2573 void __attribute__((noinline)) SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(task_t task, int thread_count)
2574 {
2575         mach_exception_data_type_t code[EXCEPTION_CODE_MAX] = {0};
2576         int pid = task_pid(task);
2577         char procname[MAXCOMLEN+1] = "unknown";
2578
2579         if (pid == 1) {
2580                 /*
2581                  * Cannot suspend launchd
2582                  */
2583                 return;
2584         }
2585
2586         proc_name(pid, procname, sizeof(procname));
2587
2588         if (disable_exc_resource) {
2589                 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2590                         "supressed by a boot-arg. \n", procname, pid, thread_count);
2591                 return;
2592         }
2593
2594         if (audio_active) {
2595                 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2596                         "supressed due to audio playback.\n", procname, pid, thread_count);
2597                 return;
2598         }
2599
2600         if (exc_via_corpse_forking == 0) {
2601                 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2602                         "supressed due to corpse forking being disabled.\n", procname, pid,
2603                         thread_count);
2604                 return;
2605         }
2606
2607         printf("process %s[%d] crossed thread count high watermark (%d), sending "
2608                 "EXC_RESOURCE\n", procname, pid, thread_count);
2609
2610         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_THREADS);
2611         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_THREADS_HIGH_WATERMARK);
2612         EXC_RESOURCE_THREADS_ENCODE_THREADS(code[0], thread_count);
2613
2614         task_enqueue_exception_with_corpse(task, EXC_RESOURCE, code, EXCEPTION_CODE_MAX, NULL);
2615 }
2616 #endif /* DEVELOPMENT || DEBUG */
2617
2618 void thread_update_io_stats(thread_t thread, int size, int io_flags)
2619 {
2620         int io_tier;
2621
2622         if (thread->thread_io_stats == NULL || thread->task->task_io_stats == NULL)
2623                 return;
2624
2625         if (io_flags & DKIO_READ) {
2626                 UPDATE_IO_STATS(thread->thread_io_stats->disk_reads, size);
2627                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->disk_reads, size);
2628         }
2629
2630         if (io_flags & DKIO_META) {
2631                 UPDATE_IO_STATS(thread->thread_io_stats->metadata, size);
2632                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->metadata, size);
2633         }
2634
2635         if (io_flags & DKIO_PAGING) {
2636                 UPDATE_IO_STATS(thread->thread_io_stats->paging, size);
2637                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->paging, size);
2638         }
2639
2640         io_tier = ((io_flags & DKIO_TIER_MASK) >> DKIO_TIER_SHIFT);
2641         assert (io_tier < IO_NUM_PRIORITIES);
2642
2643         UPDATE_IO_STATS(thread->thread_io_stats->io_priority[io_tier], size);
2644         UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->io_priority[io_tier], size);
2645
2646         /* Update Total I/O Counts */
2647         UPDATE_IO_STATS(thread->thread_io_stats->total_io, size);
2648         UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->total_io, size);
2649
2650         if (!(io_flags & DKIO_READ)) {
2651                 DTRACE_IO3(physical_writes, struct task *, thread->task, uint32_t, size, int, io_flags);
2652                 ledger_credit(thread->task->ledger, task_ledgers.physical_writes, size);
2653         }
2654 }
2655
2656 static void
2657 init_thread_ledgers(void) {
2658         ledger_template_t t;
2659         int idx;
2660
2661         assert(thread_ledger_template == NULL);
2662
2663         if ((t = ledger_template_create("Per-thread ledger")) == NULL)
2664                 panic("couldn't create thread ledger template");
2665
2666         if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) {
2667                 panic("couldn't create cpu_time entry for thread ledger template");
2668         }
2669
2670         if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) {
2671                 panic("couldn't set thread ledger callback for cpu_time entry");
2672         }
2673
2674         thread_ledgers.cpu_time = idx;
2675
2676         ledger_template_complete(t);
2677         thread_ledger_template = t;
2678 }
2679
2680 /*
2681  * Returns currently applied CPU usage limit, or 0/0 if none is applied.
2682  */
2683 int
2684 thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns)
2685 {
2686         int64_t         abstime = 0;
2687         uint64_t        limittime = 0;
2688         thread_t        thread = current_thread();
2689
2690         *percentage  = 0;
2691         *interval_ns = 0;
2692         *action      = 0;
2693
2694         if (thread->t_threadledger == LEDGER_NULL) {
2695                 /*
2696                  * This thread has no per-thread ledger, so it can't possibly
2697                  * have a CPU limit applied.
2698                  */
2699                 return (KERN_SUCCESS);
2700         }
2701
2702         ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns);
2703         ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime);
2704
2705         if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) {
2706                 /*
2707                  * This thread's CPU time ledger has no period or limit; so it
2708                  * doesn't have a CPU limit applied.
2709                  */
2710                  return (KERN_SUCCESS);
2711         }
2712
2713         /*
2714          * This calculation is the converse to the one in thread_set_cpulimit().
2715          */
2716         absolutetime_to_nanoseconds(abstime, &limittime);
2717         *percentage = (limittime * 100ULL) / *interval_ns;
2718         assert(*percentage <= 100);
2719
2720         if (thread->options & TH_OPT_PROC_CPULIMIT) {
2721                 assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0);
2722
2723                 *action = THREAD_CPULIMIT_BLOCK;
2724         } else if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2725                 assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0);
2726
2727                 *action = THREAD_CPULIMIT_EXCEPTION;
2728         } else {
2729                 *action = THREAD_CPULIMIT_DISABLE;
2730         }
2731
2732         return (KERN_SUCCESS);
2733 }
2734
2735 /*
2736  * Set CPU usage limit on a thread.
2737  *
2738  * Calling with percentage of 0 will unset the limit for this thread.
2739  */
2740 int
2741 thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns)
2742 {
2743         thread_t        thread = current_thread();
2744         ledger_t        l;
2745         uint64_t        limittime = 0;
2746         uint64_t        abstime = 0;
2747
2748         assert(percentage <= 100);
2749
2750         if (action == THREAD_CPULIMIT_DISABLE) {
2751                 /*
2752                  * Remove CPU limit, if any exists.
2753                  */
2754                 if (thread->t_threadledger != LEDGER_NULL) {
2755                         l = thread->t_threadledger;
2756                         ledger_set_limit(l, thread_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
2757                         ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_IGNORE);
2758                         thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT);
2759                 }
2760
2761                 return (0);
2762         }
2763
2764         if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) {
2765                 return (KERN_INVALID_ARGUMENT);
2766         }
2767
2768         l = thread->t_threadledger;
2769         if (l == LEDGER_NULL) {
2770                 /*
2771                  * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active.
2772                  */
2773                 if ((l = ledger_instantiate(thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL)
2774                         return (KERN_RESOURCE_SHORTAGE);
2775
2776                 /*
2777                  * We are the first to create this thread's ledger, so only activate our entry.
2778                  */
2779                 ledger_entry_setactive(l, thread_ledgers.cpu_time);
2780                 thread->t_threadledger = l;
2781         }
2782
2783         /*
2784          * The limit is specified as a percentage of CPU over an interval in nanoseconds.
2785          * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit.
2786          */
2787         limittime = (interval_ns * percentage) / 100;
2788         nanoseconds_to_absolutetime(limittime, &abstime);
2789         ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct);
2790         /*
2791          * Refill the thread's allotted CPU time every interval_ns nanoseconds.
2792          */
2793         ledger_set_period(l, thread_ledgers.cpu_time, interval_ns);
2794
2795         if (action == THREAD_CPULIMIT_EXCEPTION) {
2796                 /*
2797                  * We don't support programming the CPU usage monitor on a task if any of its
2798                  * threads have a per-thread blocking CPU limit configured.
2799                  */
2800                 if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2801                         panic("CPU usage monitor activated, but blocking thread limit exists");
2802                 }
2803
2804                 /*
2805                  * Make a note that this thread's CPU limit is being used for the task-wide CPU
2806                  * usage monitor. We don't have to arm the callback which will trigger the
2807                  * exception, because that was done for us in ledger_instantiate (because the
2808                  * ledger template used has a default callback).
2809                  */
2810                 thread->options |= TH_OPT_PROC_CPULIMIT;
2811         } else {
2812                 /*
2813                  * We deliberately override any CPU limit imposed by a task-wide limit (eg
2814                  * CPU usage monitor).
2815                  */
2816                 thread->options &= ~TH_OPT_PROC_CPULIMIT;
2817
2818                 thread->options |= TH_OPT_PRVT_CPULIMIT;
2819                 /* The per-thread ledger template by default has a callback for CPU time */
2820                 ledger_disable_callback(l, thread_ledgers.cpu_time);
2821                 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
2822         }
2823
2824         return (0);
2825 }
2826
2827 void
2828 thread_sched_call(
2829         thread_t                thread,
2830         sched_call_t    call)
2831 {
2832         assert((thread->state & TH_WAIT_REPORT) == 0);
2833         thread->sched_call = call;
2834 }
2835
2836 uint64_t
2837 thread_tid(
2838         thread_t        thread)
2839 {
2840         return (thread != THREAD_NULL? thread->thread_id: 0);
2841 }
2842
2843 uint16_t
2844 thread_set_tag(thread_t th, uint16_t tag)
2845 {
2846         return thread_set_tag_internal(th, tag);
2847 }
2848
2849 uint16_t
2850 thread_get_tag(thread_t th)
2851 {
2852         return thread_get_tag_internal(th);
2853 }
2854
2855 uint64_t
2856 thread_last_run_time(thread_t th)
2857 {
2858         return th->last_run_time;
2859 }
2860
2861 uint64_t
2862 thread_dispatchqaddr(
2863         thread_t                thread)
2864 {
2865         uint64_t        dispatchqueue_addr;
2866         uint64_t        thread_handle;
2867
2868         if (thread == THREAD_NULL)
2869                 return 0;
2870
2871         thread_handle = thread->machine.cthread_self;
2872         if (thread_handle == 0)
2873                 return 0;
2874
2875         if (thread->inspection == TRUE)
2876                 dispatchqueue_addr = thread_handle + get_task_dispatchqueue_offset(thread->task);
2877         else if (thread->task->bsd_info)
2878                 dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
2879         else
2880                 dispatchqueue_addr = 0;
2881
2882         return dispatchqueue_addr;
2883 }
2884
2885 uint64_t
2886 thread_rettokern_addr(
2887         thread_t                thread)
2888 {
2889         uint64_t        rettokern_addr;
2890         uint64_t        rettokern_offset;
2891         uint64_t        thread_handle;
2892
2893         if (thread == THREAD_NULL)
2894                 return 0;
2895
2896         thread_handle = thread->machine.cthread_self;
2897         if (thread_handle == 0)
2898                 return 0;
2899
2900         if (thread->task->bsd_info) {
2901                 rettokern_offset = get_return_to_kernel_offset_from_proc(thread->task->bsd_info);
2902
2903                 /* Return 0 if return to kernel offset is not initialized. */
2904                 if (rettokern_offset == 0) {
2905                         rettokern_addr = 0;
2906                 } else {
2907                         rettokern_addr = thread_handle + rettokern_offset;
2908                 }
2909         } else {
2910                 rettokern_addr = 0;
2911         }
2912
2913         return rettokern_addr;
2914 }
2915
2916 /*
2917  * Export routines to other components for things that are done as macros
2918  * within the osfmk component.
2919  */
2920
2921 #undef thread_mtx_lock
2922 void thread_mtx_lock(thread_t thread);
2923 void
2924 thread_mtx_lock(thread_t thread)
2925 {
2926         lck_mtx_lock(&thread->mutex);
2927 }
2928
2929 #undef thread_mtx_unlock
2930 void thread_mtx_unlock(thread_t thread);
2931 void
2932 thread_mtx_unlock(thread_t thread)
2933 {
2934         lck_mtx_unlock(&thread->mutex);
2935 }
2936
2937 #undef thread_reference
2938 void thread_reference(thread_t thread);
2939 void
2940 thread_reference(
2941         thread_t        thread)
2942 {
2943         if (thread != THREAD_NULL)
2944                 thread_reference_internal(thread);
2945 }
2946
2947 #undef thread_should_halt
2948
2949 boolean_t
2950 thread_should_halt(
2951         thread_t                th)
2952 {
2953         return (thread_should_halt_fast(th));
2954 }
2955
2956 /*
2957  * thread_set_voucher_name - reset the voucher port name bound to this thread
2958  *
2959  * Conditions:  nothing locked
2960  *
2961  *      If we already converted the previous name to a cached voucher
2962  *      reference, then we discard that reference here.  The next lookup
2963  *      will cache it again.
2964  */
2965
2966 kern_return_t
2967 thread_set_voucher_name(mach_port_name_t voucher_name)
2968 {
2969         thread_t thread = current_thread();
2970         ipc_voucher_t new_voucher = IPC_VOUCHER_NULL;
2971         ipc_voucher_t voucher;
2972         ledger_t bankledger = NULL;
2973         struct thread_group *banktg = NULL;
2974
2975         if (MACH_PORT_DEAD == voucher_name)
2976                 return KERN_INVALID_RIGHT;
2977
2978         /*
2979          * agressively convert to voucher reference
2980          */
2981         if (MACH_PORT_VALID(voucher_name)) {
2982                 new_voucher = convert_port_name_to_voucher(voucher_name);
2983                 if (IPC_VOUCHER_NULL == new_voucher)
2984                         return KERN_INVALID_ARGUMENT;
2985         }
2986         bank_get_bank_ledger_and_thread_group(new_voucher, &bankledger, &banktg);
2987
2988         thread_mtx_lock(thread);
2989         voucher = thread->ith_voucher;
2990         thread->ith_voucher_name = voucher_name;
2991         thread->ith_voucher = new_voucher;
2992         thread_mtx_unlock(thread);
2993
2994         bank_swap_thread_bank_ledger(thread, bankledger);
2995
2996         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2997                                   MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2998                                   (uintptr_t)thread_tid(thread),
2999                                   (uintptr_t)voucher_name,
3000                                   VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
3001                                   1, 0);
3002
3003         if (IPC_VOUCHER_NULL != voucher)
3004                 ipc_voucher_release(voucher);
3005
3006         return KERN_SUCCESS;
3007 }
3008
3009 /*
3010  *  thread_get_mach_voucher - return a voucher reference for the specified thread voucher
3011  *
3012  *  Conditions:  nothing locked
3013  *
3014  *  A reference to the voucher may be lazily pending, if someone set the voucher name
3015  *  but nobody has done a lookup yet.  In that case, we'll have to do the equivalent
3016  *  lookup here.
3017  *
3018  *  NOTE:       At the moment, there is no distinction between the current and effective
3019  *              vouchers because we only set them at the thread level currently.
3020  */
3021 kern_return_t
3022 thread_get_mach_voucher(
3023         thread_act_t            thread,
3024         mach_voucher_selector_t __unused which,
3025         ipc_voucher_t           *voucherp)
3026 {
3027         ipc_voucher_t           voucher;
3028         mach_port_name_t        voucher_name;
3029
3030         if (THREAD_NULL == thread)
3031                 return KERN_INVALID_ARGUMENT;
3032
3033         thread_mtx_lock(thread);
3034         voucher = thread->ith_voucher;
3035
3036         /* if already cached, just return a ref */
3037         if (IPC_VOUCHER_NULL != voucher) {
3038                 ipc_voucher_reference(voucher);
3039                 thread_mtx_unlock(thread);
3040                 *voucherp = voucher;
3041                 return KERN_SUCCESS;
3042         }
3043
3044         voucher_name = thread->ith_voucher_name;
3045
3046         /* convert the name to a port, then voucher reference */
3047         if (MACH_PORT_VALID(voucher_name)) {
3048                 ipc_port_t port;
3049
3050                 if (KERN_SUCCESS !=
3051                     ipc_object_copyin(thread->task->itk_space, voucher_name,
3052                                       MACH_MSG_TYPE_COPY_SEND, (ipc_object_t *)&port)) {
3053                         thread->ith_voucher_name = MACH_PORT_NULL;
3054                         thread_mtx_unlock(thread);
3055                         *voucherp = IPC_VOUCHER_NULL;
3056                         return KERN_SUCCESS;
3057                 }
3058
3059                 /* convert to a voucher ref to return, and cache a ref on thread */
3060                 voucher = convert_port_to_voucher(port);
3061                 ipc_voucher_reference(voucher);
3062                 thread->ith_voucher = voucher;
3063                 thread_mtx_unlock(thread);
3064
3065                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3066                                           MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
3067                                           (uintptr_t)thread_tid(thread),
3068                                           (uintptr_t)port,
3069                                           VM_KERNEL_ADDRPERM((uintptr_t)voucher),
3070                                           2, 0);
3071
3072
3073                 ipc_port_release_send(port);
3074         } else
3075                 thread_mtx_unlock(thread);
3076
3077         *voucherp = voucher;
3078         return KERN_SUCCESS;
3079 }
3080
3081 /*
3082  *  thread_set_mach_voucher - set a voucher reference for the specified thread voucher
3083  *
3084  *  Conditions: callers holds a reference on the voucher.
3085  *              nothing locked.
3086  *
3087  *  We grab another reference to the voucher and bind it to the thread.  Any lazy
3088  *  binding is erased.  The old voucher reference associated with the thread is
3089  *  discarded.
3090  */
3091 kern_return_t
3092 thread_set_mach_voucher(
3093         thread_t                thread,
3094         ipc_voucher_t           voucher)
3095 {
3096         ipc_voucher_t old_voucher;
3097         ledger_t bankledger = NULL;
3098         struct thread_group *banktg = NULL;
3099
3100         if (THREAD_NULL == thread)
3101                 return KERN_INVALID_ARGUMENT;
3102
3103         if (thread != current_thread() && thread->started)
3104                 return KERN_INVALID_ARGUMENT;
3105
3106         ipc_voucher_reference(voucher);
3107         bank_get_bank_ledger_and_thread_group(voucher, &bankledger, &banktg);
3108
3109         thread_mtx_lock(thread);
3110         old_voucher = thread->ith_voucher;
3111         thread->ith_voucher = voucher;
3112         thread->ith_voucher_name = MACH_PORT_NULL;
3113         thread_mtx_unlock(thread);
3114
3115         bank_swap_thread_bank_ledger(thread, bankledger);
3116
3117         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3118                                   MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
3119                                   (uintptr_t)thread_tid(thread),
3120                                   (uintptr_t)MACH_PORT_NULL,
3121                                   VM_KERNEL_ADDRPERM((uintptr_t)voucher),
3122                                   3, 0);
3123
3124         ipc_voucher_release(old_voucher);
3125
3126         return KERN_SUCCESS;
3127 }
3128
3129 /*
3130  *  thread_swap_mach_voucher - swap a voucher reference for the specified thread voucher
3131  *
3132  *  Conditions: callers holds a reference on the new and presumed old voucher(s).
3133  *              nothing locked.
3134  *
3135  *  This function is no longer supported.
3136  */
3137 kern_return_t
3138 thread_swap_mach_voucher(
3139         __unused thread_t               thread,
3140         __unused ipc_voucher_t          new_voucher,
3141         ipc_voucher_t                   *in_out_old_voucher)
3142 {
3143         /*
3144          * Currently this function is only called from a MIG generated
3145          * routine which doesn't release the reference on the voucher
3146          * addressed by in_out_old_voucher. To avoid leaking this reference,
3147          * a call to release it has been added here.
3148          */
3149         ipc_voucher_release(*in_out_old_voucher);
3150         return KERN_NOT_SUPPORTED;
3151 }
3152
3153 /*
3154  *  thread_get_current_voucher_origin_pid - get the pid of the originator of the current voucher.
3155  */
3156 kern_return_t
3157 thread_get_current_voucher_origin_pid(
3158         int32_t      *pid)
3159 {
3160         uint32_t buf_size;
3161         kern_return_t kr;
3162         thread_t thread = current_thread();
3163
3164         buf_size = sizeof(*pid);
3165         kr = mach_voucher_attr_command(thread->ith_voucher,
3166                 MACH_VOUCHER_ATTR_KEY_BANK,
3167                 BANK_ORIGINATOR_PID,
3168                 NULL,
3169                 0,
3170                 (mach_voucher_attr_content_t)pid,
3171                 &buf_size);
3172
3173         return kr;
3174 }
3175
3176
3177 boolean_t
3178 thread_has_thread_name(thread_t th)
3179 {
3180         if ((th) && (th->uthread)) {
3181                 return bsd_hasthreadname(th->uthread);
3182         }
3183
3184         /*
3185          * This is an odd case; clients may set the thread name based on the lack of
3186          * a name, but in this context there is no uthread to attach the name to.
3187          */
3188         return FALSE;
3189 }
3190
3191 void
3192 thread_set_thread_name(thread_t th, const char* name)
3193 {
3194         if ((th) && (th->uthread) && name) {
3195                 bsd_setthreadname(th->uthread, name);
3196         }
3197 }
3198
3199 void
3200 thread_set_honor_qlimit(thread_t thread)
3201 {
3202         thread->options |= TH_OPT_HONOR_QLIMIT;
3203 }
3204
3205 void
3206 thread_clear_honor_qlimit(thread_t thread)
3207 {
3208         thread->options &= (~TH_OPT_HONOR_QLIMIT);
3209 }
3210
3211 /*
3212  * thread_enable_send_importance - set/clear the SEND_IMPORTANCE thread option bit.
3213  */
3214 void thread_enable_send_importance(thread_t thread, boolean_t enable)
3215 {
3216         if (enable == TRUE)
3217                 thread->options |= TH_OPT_SEND_IMPORTANCE;
3218         else
3219                 thread->options &= ~TH_OPT_SEND_IMPORTANCE;
3220 }
3221
3222 /*
3223  * thread_set_allocation_name - .
3224  */
3225
3226 kern_allocation_name_t thread_set_allocation_name(kern_allocation_name_t new_name)
3227 {
3228         kern_allocation_name_t ret;
3229         thread_kernel_state_t kstate = thread_get_kernel_state(current_thread());
3230         ret = kstate->allocation_name;
3231         // fifo
3232         if (!new_name || !kstate->allocation_name) kstate->allocation_name = new_name;
3233         return ret;
3234 }
3235
3236 uint64_t
3237 thread_get_last_wait_duration(thread_t thread)
3238 {
3239         return thread->last_made_runnable_time - thread->last_run_time;
3240 }
3241
3242 #if CONFIG_DTRACE
3243 uint32_t dtrace_get_thread_predcache(thread_t thread)
3244 {
3245         if (thread != THREAD_NULL)
3246                 return thread->t_dtrace_predcache;
3247         else
3248                 return 0;
3249 }
3250
3251 int64_t dtrace_get_thread_vtime(thread_t thread)
3252 {
3253         if (thread != THREAD_NULL)
3254                 return thread->t_dtrace_vtime;
3255         else
3256                 return 0;
3257 }
3258
3259 int dtrace_get_thread_last_cpu_id(thread_t thread)
3260 {
3261         if ((thread != THREAD_NULL) && (thread->last_processor != PROCESSOR_NULL)) {
3262                 return thread->last_processor->cpu_id;
3263         } else {
3264                 return -1;
3265         }
3266 }
3267
3268 int64_t dtrace_get_thread_tracing(thread_t thread)
3269 {
3270         if (thread != THREAD_NULL)
3271                 return thread->t_dtrace_tracing;
3272         else
3273                 return 0;
3274 }
3275
3276 boolean_t dtrace_get_thread_reentering(thread_t thread)
3277 {
3278         if (thread != THREAD_NULL)
3279                 return (thread->options & TH_OPT_DTRACE) ? TRUE : FALSE;
3280         else
3281                 return 0;
3282 }
3283
3284 vm_offset_t dtrace_get_kernel_stack(thread_t thread)
3285 {
3286         if (thread != THREAD_NULL)
3287                 return thread->kernel_stack;
3288         else
3289                 return 0;
3290 }
3291
3292 #if KASAN
3293 struct kasan_thread_data *
3294 kasan_get_thread_data(thread_t thread)
3295 {
3296         return &thread->kasan_data;
3297 }
3298 #endif
3299
3300 int64_t dtrace_calc_thread_recent_vtime(thread_t thread)
3301 {
3302         if (thread != THREAD_NULL) {
3303                 processor_t             processor = current_processor();
3304                 uint64_t                                abstime = mach_absolute_time();
3305                 timer_t                                 timer;
3306
3307                 timer = PROCESSOR_DATA(processor, thread_timer);
3308
3309                 return timer_grab(&(thread->system_timer)) + timer_grab(&(thread->user_timer)) +
3310                                 (abstime - timer->tstamp); /* XXX need interrupts off to prevent missed time? */
3311         } else
3312                 return 0;
3313 }
3314
3315 void dtrace_set_thread_predcache(thread_t thread, uint32_t predcache)
3316 {
3317         if (thread != THREAD_NULL)
3318                 thread->t_dtrace_predcache = predcache;
3319 }
3320
3321 void dtrace_set_thread_vtime(thread_t thread, int64_t vtime)
3322 {
3323         if (thread != THREAD_NULL)
3324                 thread->t_dtrace_vtime = vtime;
3325 }
3326
3327 void dtrace_set_thread_tracing(thread_t thread, int64_t accum)
3328 {
3329         if (thread != THREAD_NULL)
3330                 thread->t_dtrace_tracing = accum;
3331 }
3332
3333 void dtrace_set_thread_reentering(thread_t thread, boolean_t vbool)
3334 {
3335         if (thread != THREAD_NULL) {
3336                 if (vbool)
3337                         thread->options |= TH_OPT_DTRACE;
3338                 else
3339                         thread->options &= (~TH_OPT_DTRACE);
3340         }
3341 }
3342
3343 vm_offset_t dtrace_set_thread_recover(thread_t thread, vm_offset_t recover)
3344 {
3345         vm_offset_t prev = 0;
3346
3347         if (thread != THREAD_NULL) {
3348                 prev = thread->recover;
3349                 thread->recover = recover;
3350         }
3351         return prev;
3352 }
3353
3354 void dtrace_thread_bootstrap(void)
3355 {
3356         task_t task = current_task();
3357
3358         if (task->thread_count == 1) {
3359                 thread_t thread = current_thread();
3360                 if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) {
3361                         thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS;
3362                         DTRACE_PROC(exec__success);
3363                         KDBG(BSDDBG_CODE(DBG_BSD_PROC,BSD_PROC_EXEC),
3364                              task_pid(task));
3365                 }
3366                 DTRACE_PROC(start);
3367         }
3368         DTRACE_PROC(lwp__start);
3369
3370 }
3371
3372 void
3373 dtrace_thread_didexec(thread_t thread)
3374 {
3375         thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS;
3376 }
3377 #endif /* CONFIG_DTRACE */