osfmk/kern/thread.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   kern/thread.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub
  61  *      Date:   1986
  62  *
  63  *      Thread management primitives implementation.
  64  */
  65 /*
  66  * Copyright (c) 1993 The University of Utah and
  67  * the Computer Systems Laboratory (CSL).  All rights reserved.
  68  *
  69  * Permission to use, copy, modify and distribute this software and its
  70  * documentation is hereby granted, provided that both the copyright
  71  * notice and this permission notice appear in all copies of the
  72  * software, derivative works or modified versions, and any portions
  73  * thereof, and that both notices appear in supporting documentation.
  74  *
  75  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  76  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  77  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  78  *
  79  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  80  * improvements that they make and grant CSL redistribution rights.
  81  *
  82  */
  83
  84 #include <mach/mach_types.h>
  85 #include <mach/boolean.h>
  86 #include <mach/policy.h>
  87 #include <mach/thread_info.h>
  88 #include <mach/thread_special_ports.h>
  89 #include <mach/thread_status.h>
  90 #include <mach/time_value.h>
  91 #include <mach/vm_param.h>
  92
  93 #include <machine/thread.h>
  94 #include <machine/pal_routines.h>
  95 #include <machine/limits.h>
  96
  97 #include <kern/kern_types.h>
  98 #include <kern/kalloc.h>
  99 #include <kern/cpu_data.h>
 100 #include <kern/counters.h>
 101 #include <kern/extmod_statistics.h>
 102 #include <kern/ipc_mig.h>
 103 #include <kern/ipc_tt.h>
 104 #include <kern/mach_param.h>
 105 #include <kern/machine.h>
 106 #include <kern/misc_protos.h>
 107 #include <kern/processor.h>
 108 #include <kern/queue.h>
 109 #include <kern/sched.h>
 110 #include <kern/sched_prim.h>
 111 #include <kern/sync_lock.h>
 112 #include <kern/syscall_subr.h>
 113 #include <kern/task.h>
 114 #include <kern/thread.h>
 115 #include <kern/thread_group.h>
 116 #include <kern/coalition.h>
 117 #include <kern/host.h>
 118 #include <kern/zalloc.h>
 119 #include <kern/assert.h>
 120 #include <kern/exc_resource.h>
 121 #include <kern/exc_guard.h>
 122 #include <kern/telemetry.h>
 123 #include <kern/policy_internal.h>
 124
 125 #include <corpses/task_corpse.h>
 126 #if KPC
 127 #include <kern/kpc.h>
 128 #endif
 129
 130 #if MONOTONIC
 131 #include <kern/monotonic.h>
 132 #include <machine/monotonic.h>
 133 #endif /* MONOTONIC */
 134
 135 #include <ipc/ipc_kmsg.h>
 136 #include <ipc/ipc_port.h>
 137 #include <bank/bank_types.h>
 138
 139 #include <vm/vm_kern.h>
 140 #include <vm/vm_pageout.h>
 141
 142 #include <sys/kdebug.h>
 143 #include <sys/bsdtask_info.h>
 144 #include <mach/sdt.h>
 145 #include <san/kasan.h>
 146
 147 #include <stdatomic.h>
 148
 149 /*
 150  * Exported interfaces
 151  */
 152 #include <mach/task_server.h>
 153 #include <mach/thread_act_server.h>
 154 #include <mach/mach_host_server.h>
 155 #include <mach/host_priv_server.h>
 156 #include <mach/mach_voucher_server.h>
 157 #include <kern/policy_internal.h>
 158
 159 static struct zone                      *thread_zone;
 160 static lck_grp_attr_t           thread_lck_grp_attr;
 161 lck_attr_t                                      thread_lck_attr;
 162 lck_grp_t                                       thread_lck_grp;
 163
 164 struct zone                                     *thread_qos_override_zone;
 165
 166 decl_simple_lock_data(static,thread_stack_lock)
 167 static queue_head_t             thread_stack_queue;
 168
 169 decl_simple_lock_data(static,thread_terminate_lock)
 170 static queue_head_t             thread_terminate_queue;
 171
 172 static queue_head_t             crashed_threads_queue;
 173
 174 decl_simple_lock_data(static,thread_exception_lock)
 175 static queue_head_t             thread_exception_queue;
 176
 177 struct thread_exception_elt {
 178         queue_chain_t           elt;
 179         exception_type_t        exception_type;
 180         task_t                  exception_task;
 181         thread_t                exception_thread;
 182 };
 183
 184 static struct thread    thread_template, init_thread;
 185
 186 static void             sched_call_null(
 187                                         int                     type,
 188                                         thread_t        thread);
 189
 190 #ifdef MACH_BSD
 191 extern void proc_exit(void *);
 192 extern mach_exception_data_type_t proc_encode_exit_exception_code(void *);
 193 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 194 extern uint64_t get_return_to_kernel_offset_from_proc(void *p);
 195 extern int      proc_selfpid(void);
 196 extern char *   proc_name_address(void *p);
 197 #endif /* MACH_BSD */
 198
 199 extern int disable_exc_resource;
 200 extern int audio_active;
 201 extern int debug_task;
 202 int thread_max = CONFIG_THREAD_MAX;     /* Max number of threads */
 203 int task_threadmax = CONFIG_THREAD_MAX;
 204
 205 static uint64_t         thread_unique_id = 100;
 206
 207 struct _thread_ledger_indices thread_ledgers = { -1 };
 208 static ledger_template_t thread_ledger_template = NULL;
 209 static void init_thread_ledgers(void);
 210
 211 #if CONFIG_JETSAM
 212 void jetsam_on_ledger_cpulimit_exceeded(void);
 213 #endif
 214
 215 /*
 216  * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry.
 217  *
 218  * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user
 219  *  stacktraces, aka micro-stackshots)
 220  */
 221 #define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70
 222
 223 int cpumon_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 224 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void);
 225
 226 /*
 227  * The smallest interval over which we support limiting CPU consumption is 1ms
 228  */
 229 #define MINIMUM_CPULIMIT_INTERVAL_MS 1
 230
 231 void
 232 thread_bootstrap(void)
 233 {
 234         /*
 235          *      Fill in a template thread for fast initialization.
 236          */
 237
 238 #if MACH_ASSERT
 239         thread_template.thread_magic = THREAD_MAGIC;
 240 #endif /* MACH_ASSERT */
 241
 242         thread_template.runq = PROCESSOR_NULL;
 243
 244         thread_template.ref_count = 2;
 245
 246         thread_template.reason = AST_NONE;
 247         thread_template.at_safe_point = FALSE;
 248         thread_template.wait_event = NO_EVENT64;
 249         thread_template.waitq = NULL;
 250         thread_template.wait_result = THREAD_WAITING;
 251         thread_template.options = THREAD_ABORTSAFE;
 252         thread_template.state = TH_WAIT | TH_UNINT;
 253         thread_template.wake_active = FALSE;
 254         thread_template.continuation = THREAD_CONTINUE_NULL;
 255         thread_template.parameter = NULL;
 256
 257         thread_template.importance = 0;
 258         thread_template.sched_mode = TH_MODE_NONE;
 259         thread_template.sched_flags = 0;
 260         thread_template.saved_mode = TH_MODE_NONE;
 261         thread_template.safe_release = 0;
 262         thread_template.th_sched_bucket = TH_BUCKET_RUN;
 263
 264         thread_template.sfi_class = SFI_CLASS_UNSPECIFIED;
 265         thread_template.sfi_wait_class = SFI_CLASS_UNSPECIFIED;
 266
 267         thread_template.active = 0;
 268         thread_template.started = 0;
 269         thread_template.static_param = 0;
 270         thread_template.policy_reset = 0;
 271
 272         thread_template.base_pri = BASEPRI_DEFAULT;
 273         thread_template.sched_pri = 0;
 274         thread_template.max_priority = 0;
 275         thread_template.task_priority = 0;
 276         thread_template.promotions = 0;
 277         thread_template.pending_promoter_index = 0;
 278         thread_template.pending_promoter[0] = NULL;
 279         thread_template.pending_promoter[1] = NULL;
 280         thread_template.rwlock_count = 0;
 281
 282
 283         thread_template.realtime.deadline = UINT64_MAX;
 284
 285         thread_template.quantum_remaining = 0;
 286         thread_template.last_run_time = 0;
 287         thread_template.last_made_runnable_time = THREAD_NOT_RUNNABLE;
 288         thread_template.last_basepri_change_time = THREAD_NOT_RUNNABLE;
 289         thread_template.same_pri_latency = 0;
 290
 291         thread_template.computation_metered = 0;
 292         thread_template.computation_epoch = 0;
 293
 294 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
 295         thread_template.sched_stamp = 0;
 296         thread_template.pri_shift = INT8_MAX;
 297         thread_template.sched_usage = 0;
 298         thread_template.cpu_usage = thread_template.cpu_delta = 0;
 299 #endif
 300         thread_template.c_switch = thread_template.p_switch = thread_template.ps_switch = 0;
 301
 302 #if MONOTONIC
 303         memset(&thread_template.t_monotonic, 0,
 304                         sizeof(thread_template.t_monotonic));
 305 #endif /* MONOTONIC */
 306
 307         thread_template.bound_processor = PROCESSOR_NULL;
 308         thread_template.last_processor = PROCESSOR_NULL;
 309
 310         thread_template.sched_call = sched_call_null;
 311
 312         timer_init(&thread_template.user_timer);
 313         timer_init(&thread_template.system_timer);
 314         timer_init(&thread_template.ptime);
 315         thread_template.user_timer_save = 0;
 316         thread_template.system_timer_save = 0;
 317         thread_template.vtimer_user_save = 0;
 318         thread_template.vtimer_prof_save = 0;
 319         thread_template.vtimer_rlim_save = 0;
 320         thread_template.vtimer_qos_save  = 0;
 321
 322 #if CONFIG_SCHED_SFI
 323         thread_template.wait_sfi_begin_time = 0;
 324 #endif
 325
 326         thread_template.wait_timer_is_set = FALSE;
 327         thread_template.wait_timer_active = 0;
 328
 329         thread_template.depress_timer_active = 0;
 330
 331         thread_template.recover = (vm_offset_t)NULL;
 332
 333         thread_template.map = VM_MAP_NULL;
 334
 335 #if CONFIG_DTRACE
 336         thread_template.t_dtrace_predcache = 0;
 337         thread_template.t_dtrace_vtime = 0;
 338         thread_template.t_dtrace_tracing = 0;
 339 #endif /* CONFIG_DTRACE */
 340
 341 #if KPERF
 342         thread_template.kperf_flags = 0;
 343         thread_template.kperf_pet_gen = 0;
 344         thread_template.kperf_c_switch = 0;
 345         thread_template.kperf_pet_cnt = 0;
 346 #endif
 347
 348 #if KPC
 349         thread_template.kpc_buf = NULL;
 350 #endif
 351
 352 #if HYPERVISOR
 353         thread_template.hv_thread_target = NULL;
 354 #endif /* HYPERVISOR */
 355
 356 #if (DEVELOPMENT || DEBUG)
 357         thread_template.t_page_creation_throttled_hard = 0;
 358         thread_template.t_page_creation_throttled_soft = 0;
 359 #endif /* DEVELOPMENT || DEBUG */
 360         thread_template.t_page_creation_throttled = 0;
 361         thread_template.t_page_creation_count = 0;
 362         thread_template.t_page_creation_time = 0;
 363
 364         thread_template.affinity_set = NULL;
 365
 366         thread_template.syscalls_unix = 0;
 367         thread_template.syscalls_mach = 0;
 368
 369         thread_template.t_ledger = LEDGER_NULL;
 370         thread_template.t_threadledger = LEDGER_NULL;
 371         thread_template.t_bankledger = LEDGER_NULL;
 372         thread_template.t_deduct_bank_ledger_time = 0;
 373
 374         thread_template.requested_policy = (struct thread_requested_policy) {};
 375         thread_template.effective_policy = (struct thread_effective_policy) {};
 376
 377         bzero(&thread_template.overrides, sizeof(thread_template.overrides));
 378         thread_template.sync_ipc_overrides = 0;
 379
 380         thread_template.iotier_override = THROTTLE_LEVEL_NONE;
 381         thread_template.thread_io_stats = NULL;
 382 #if CONFIG_EMBEDDED
 383         thread_template.taskwatch = NULL;
 384 #endif /* CONFIG_EMBEDDED */
 385         thread_template.thread_callout_interrupt_wakeups = thread_template.thread_callout_platform_idle_wakeups = 0;
 386
 387         thread_template.thread_timer_wakeups_bin_1 = thread_template.thread_timer_wakeups_bin_2 = 0;
 388         thread_template.callout_woken_from_icontext = thread_template.callout_woken_from_platform_idle = 0;
 389
 390         thread_template.thread_tag = 0;
 391
 392         thread_template.ith_voucher_name = MACH_PORT_NULL;
 393         thread_template.ith_voucher = IPC_VOUCHER_NULL;
 394
 395         thread_template.th_work_interval = NULL;
 396
 397         init_thread = thread_template;
 398         machine_set_current_thread(&init_thread);
 399 }
 400
 401 extern boolean_t allow_qos_policy_set;
 402
 403 void
 404 thread_init(void)
 405 {
 406         thread_zone = zinit(
 407                         sizeof(struct thread),
 408                         thread_max * sizeof(struct thread),
 409                         THREAD_CHUNK * sizeof(struct thread),
 410                         "threads");
 411
 412         thread_qos_override_zone = zinit(
 413                 sizeof(struct thread_qos_override),
 414                 4 * thread_max * sizeof(struct thread_qos_override),
 415                 PAGE_SIZE,
 416                 "thread qos override");
 417         zone_change(thread_qos_override_zone, Z_EXPAND, TRUE);
 418         zone_change(thread_qos_override_zone, Z_COLLECT, TRUE);
 419         zone_change(thread_qos_override_zone, Z_CALLERACCT, FALSE);
 420         zone_change(thread_qos_override_zone, Z_NOENCRYPT, TRUE);
 421
 422         lck_grp_attr_setdefault(&thread_lck_grp_attr);
 423         lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr);
 424         lck_attr_setdefault(&thread_lck_attr);
 425
 426         stack_init();
 427
 428         thread_policy_init();
 429
 430         /*
 431          *      Initialize any machine-dependent
 432          *      per-thread structures necessary.
 433          */
 434         machine_thread_init();
 435
 436         if (!PE_parse_boot_argn("cpumon_ustackshots_trigger_pct", &cpumon_ustackshots_trigger_pct,
 437                 sizeof (cpumon_ustackshots_trigger_pct))) {
 438                 cpumon_ustackshots_trigger_pct = CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT;
 439         }
 440
 441         PE_parse_boot_argn("-qos-policy-allow", &allow_qos_policy_set, sizeof(allow_qos_policy_set));
 442
 443         init_thread_ledgers();
 444 }
 445
 446 boolean_t
 447 thread_is_active(thread_t thread)
 448 {
 449         return (thread->active);
 450 }
 451
 452 void
 453 thread_corpse_continue(void)
 454 {
 455         thread_t thread = current_thread();
 456
 457         thread_terminate_internal(thread);
 458
 459         /*
 460          * Handle the thread termination directly
 461          * here instead of returning to userspace.
 462          */
 463         assert(thread->active == FALSE);
 464         thread_ast_clear(thread, AST_APC);
 465         thread_apc_ast(thread);
 466
 467         panic("thread_corpse_continue");
 468         /*NOTREACHED*/
 469 }
 470
 471 static void
 472 thread_terminate_continue(void)
 473 {
 474         panic("thread_terminate_continue");
 475         /*NOTREACHED*/
 476 }
 477
 478 /*
 479  *      thread_terminate_self:
 480  */
 481 void
 482 thread_terminate_self(void)
 483 {
 484         thread_t                thread = current_thread();
 485         task_t                  task;
 486         spl_t                   s;
 487         int threadcnt;
 488
 489         pal_thread_terminate_self(thread);
 490
 491         DTRACE_PROC(lwp__exit);
 492
 493         thread_mtx_lock(thread);
 494
 495         ipc_thread_disable(thread);
 496
 497         thread_mtx_unlock(thread);
 498
 499         s = splsched();
 500         thread_lock(thread);
 501
 502         /*
 503          *      Cancel priority depression, wait for concurrent expirations
 504          *      on other processors.
 505          */
 506         if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
 507                 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
 508
 509                 /* If our priority was low because of a depressed yield, restore it in case we block below */
 510                 thread_recompute_sched_pri(thread, FALSE);
 511
 512                 if (timer_call_cancel(&thread->depress_timer))
 513                         thread->depress_timer_active--;
 514         }
 515
 516         while (thread->depress_timer_active > 0) {
 517                 thread_unlock(thread);
 518                 splx(s);
 519
 520                 delay(1);
 521
 522                 s = splsched();
 523                 thread_lock(thread);
 524         }
 525
 526         thread_sched_call(thread, NULL);
 527
 528         thread_unlock(thread);
 529         splx(s);
 530
 531 #if CONFIG_EMBEDDED
 532         thead_remove_taskwatch(thread);
 533 #endif /* CONFIG_EMBEDDED */
 534
 535         work_interval_thread_terminate(thread);
 536
 537         thread_mtx_lock(thread);
 538
 539         thread_policy_reset(thread);
 540
 541         thread_mtx_unlock(thread);
 542
 543         bank_swap_thread_bank_ledger(thread, NULL);
 544
 545         task = thread->task;
 546         uthread_cleanup(task, thread->uthread, task->bsd_info);
 547
 548         if (task->bsd_info && !task_is_exec_copy(task)) {
 549                 /* trace out pid before we sign off */
 550                 long dbg_arg1 = 0;
 551                 long dbg_arg2 = 0;
 552
 553                 kdbg_trace_data(thread->task->bsd_info, &dbg_arg1, &dbg_arg2);
 554
 555                 KERNEL_DEBUG_CONSTANT(TRACE_DATA_THREAD_TERMINATE_PID | DBG_FUNC_NONE,
 556                         dbg_arg1, 0, 0, 0, 0);
 557         }
 558
 559         /*
 560          * After this subtraction, this thread should never access
 561          * task->bsd_info unless it got 0 back from the hw_atomic_sub.  It
 562          * could be racing with other threads to be the last thread in the
 563          * process, and the last thread in the process will tear down the proc
 564          * structure and zero-out task->bsd_info.
 565          */
 566         threadcnt = hw_atomic_sub(&task->active_thread_count, 1);
 567
 568         /*
 569          * If we are the last thread to terminate and the task is
 570          * associated with a BSD process, perform BSD process exit.
 571          */
 572         if (threadcnt == 0 && task->bsd_info != NULL && !task_is_exec_copy(task)) {
 573                 mach_exception_data_type_t subcode = 0;
 574                 {
 575                         /* since we're the last thread in this process, trace out the command name too */
 576                         long    dbg_arg1 = 0, dbg_arg2 = 0, dbg_arg3 = 0, dbg_arg4 = 0;
 577
 578                         kdbg_trace_string(thread->task->bsd_info, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
 579
 580                         KERNEL_DEBUG_CONSTANT(TRACE_STRING_PROC_EXIT | DBG_FUNC_NONE,
 581                                 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
 582                 }
 583
 584                 /* Get the exit reason before proc_exit */
 585                 subcode = proc_encode_exit_exception_code(task->bsd_info);
 586                 proc_exit(task->bsd_info);
 587                 /*
 588                  * if there is crash info in task
 589                  * then do the deliver action since this is
 590                  * last thread for this task.
 591                  */
 592                 if (task->corpse_info) {
 593                         task_deliver_crash_notification(task, current_thread(), EXC_RESOURCE, subcode);
 594                 }
 595         }
 596
 597         if (threadcnt == 0) {
 598                 task_lock(task);
 599                 if (task_is_a_corpse_fork(task)) {
 600                         thread_wakeup((event_t)&task->active_thread_count);
 601                 }
 602                 task_unlock(task);
 603         }
 604
 605         uthread_cred_free(thread->uthread);
 606
 607         s = splsched();
 608         thread_lock(thread);
 609
 610         /*
 611          *      Cancel wait timer, and wait for
 612          *      concurrent expirations.
 613          */
 614         if (thread->wait_timer_is_set) {
 615                 thread->wait_timer_is_set = FALSE;
 616
 617                 if (timer_call_cancel(&thread->wait_timer))
 618                         thread->wait_timer_active--;
 619         }
 620
 621         while (thread->wait_timer_active > 0) {
 622                 thread_unlock(thread);
 623                 splx(s);
 624
 625                 delay(1);
 626
 627                 s = splsched();
 628                 thread_lock(thread);
 629         }
 630
 631         /*
 632          *      If there is a reserved stack, release it.
 633          */
 634         if (thread->reserved_stack != 0) {
 635                 stack_free_reserved(thread);
 636                 thread->reserved_stack = 0;
 637         }
 638
 639         /*
 640          *      Mark thread as terminating, and block.
 641          */
 642         thread->state |= TH_TERMINATE;
 643         thread_mark_wait_locked(thread, THREAD_UNINT);
 644         assert((thread->sched_flags & TH_SFLAG_PROMOTED) == 0);
 645         assert(thread->promotions == 0);
 646         assert(!(thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED));
 647         assert(thread->rwlock_count == 0);
 648         thread_unlock(thread);
 649         /* splsched */
 650
 651         thread_block((thread_continue_t)thread_terminate_continue);
 652         /*NOTREACHED*/
 653 }
 654
 655 /* Drop a thread refcount that definitely isn't the last one. */
 656 void
 657 thread_deallocate_safe(thread_t thread)
 658 {
 659         assert_thread_magic(thread);
 660
 661         uint32_t old_refcount = atomic_fetch_sub_explicit(&thread->ref_count, 1, memory_order_release);
 662
 663         if (__improbable(old_refcount <= 1))
 664                 panic("bad thread refcount: %d", old_refcount);
 665 }
 666
 667 void
 668 thread_deallocate(
 669         thread_t                        thread)
 670 {
 671         task_t                          task;
 672         __assert_only uint32_t          th_ref_count;
 673
 674         if (thread == THREAD_NULL)
 675                 return;
 676
 677         assert_thread_magic(thread);
 678
 679         if (__probable(atomic_fetch_sub_explicit(&thread->ref_count, 1,
 680                        memory_order_release) - 1 > 0)) {
 681                 return;
 682         }
 683
 684         th_ref_count = atomic_load_explicit(&thread->ref_count, memory_order_acquire);
 685         assert(th_ref_count == 0);
 686
 687         assert(thread_owned_workloops_count(thread) == 0);
 688
 689         if (!(thread->state & TH_TERMINATE2))
 690                 panic("thread_deallocate: thread not properly terminated\n");
 691
 692         assert(thread->runq == PROCESSOR_NULL);
 693
 694         assert(thread->user_promotions == 0);
 695
 696 #if KPC
 697         kpc_thread_destroy(thread);
 698 #endif
 699
 700         ipc_thread_terminate(thread);
 701
 702         proc_thread_qos_deallocate(thread);
 703
 704         task = thread->task;
 705
 706 #ifdef MACH_BSD
 707         {
 708                 void *ut = thread->uthread;
 709
 710                 thread->uthread = NULL;
 711                 uthread_zone_free(ut);
 712         }
 713 #endif  /* MACH_BSD */
 714
 715         if (thread->t_ledger)
 716                 ledger_dereference(thread->t_ledger);
 717         if (thread->t_threadledger)
 718                 ledger_dereference(thread->t_threadledger);
 719
 720         if (IPC_VOUCHER_NULL != thread->ith_voucher)
 721                 ipc_voucher_release(thread->ith_voucher);
 722
 723         if (thread->thread_io_stats)
 724                 kfree(thread->thread_io_stats, sizeof(struct io_stat_info));
 725
 726         if (thread->kernel_stack != 0)
 727                 stack_free(thread);
 728
 729         lck_mtx_destroy(&thread->mutex, &thread_lck_grp);
 730         machine_thread_destroy(thread);
 731
 732         task_deallocate(task);
 733
 734 #if MACH_ASSERT
 735         assert_thread_magic(thread);
 736         thread->thread_magic = 0;
 737 #endif /* MACH_ASSERT */
 738
 739         zfree(thread_zone, thread);
 740 }
 741
 742 void
 743 thread_starts_owning_workloop(thread_t thread)
 744 {
 745         atomic_fetch_add_explicit(&thread->kqwl_owning_count, 1,
 746                         memory_order_relaxed);
 747 }
 748
 749 void
 750 thread_ends_owning_workloop(thread_t thread)
 751 {
 752         __assert_only uint32_t count;
 753         count = atomic_fetch_sub_explicit(&thread->kqwl_owning_count, 1,
 754                         memory_order_relaxed);
 755         assert(count > 0);
 756 }
 757
 758 uint32_t
 759 thread_owned_workloops_count(thread_t thread)
 760 {
 761         return atomic_load_explicit(&thread->kqwl_owning_count,
 762                         memory_order_relaxed);
 763 }
 764
 765 /*
 766  *      thread_inspect_deallocate:
 767  *
 768  *      Drop a thread inspection reference.
 769  */
 770 void
 771 thread_inspect_deallocate(
 772         thread_inspect_t                thread_inspect)
 773 {
 774         return(thread_deallocate((thread_t)thread_inspect));
 775 }
 776
 777 /*
 778  *      thread_exception_daemon:
 779  *
 780  *      Deliver EXC_{RESOURCE,GUARD} exception
 781  */
 782 static void
 783 thread_exception_daemon(void)
 784 {
 785         struct thread_exception_elt *elt;
 786         task_t task;
 787         thread_t thread;
 788         exception_type_t etype;
 789
 790         simple_lock(&thread_exception_lock);
 791         while ((elt = (struct thread_exception_elt *)dequeue_head(&thread_exception_queue)) != NULL) {
 792                 simple_unlock(&thread_exception_lock);
 793
 794                 etype = elt->exception_type;
 795                 task = elt->exception_task;
 796                 thread = elt->exception_thread;
 797                 assert_thread_magic(thread);
 798
 799                 kfree(elt, sizeof (*elt));
 800
 801                 /* wait for all the threads in the task to terminate */
 802                 task_lock(task);
 803                 task_wait_till_threads_terminate_locked(task);
 804                 task_unlock(task);
 805
 806                 /* Consumes the task ref returned by task_generate_corpse_internal */
 807                 task_deallocate(task);
 808                 /* Consumes the thread ref returned by task_generate_corpse_internal */
 809                 thread_deallocate(thread);
 810
 811                 /* Deliver the notification, also clears the corpse. */
 812                 task_deliver_crash_notification(task, thread, etype, 0);
 813
 814                 simple_lock(&thread_exception_lock);
 815         }
 816
 817         assert_wait((event_t)&thread_exception_queue, THREAD_UNINT);
 818         simple_unlock(&thread_exception_lock);
 819
 820         thread_block((thread_continue_t)thread_exception_daemon);
 821 }
 822
 823 /*
 824  *      thread_exception_enqueue:
 825  *
 826  *      Enqueue a corpse port to be delivered an EXC_{RESOURCE,GUARD}.
 827  */
 828 void
 829 thread_exception_enqueue(
 830         task_t          task,
 831         thread_t        thread,
 832         exception_type_t etype)
 833 {
 834         assert(EXC_RESOURCE == etype || EXC_GUARD == etype);
 835         struct thread_exception_elt *elt = kalloc(sizeof (*elt));
 836         elt->exception_type = etype;
 837         elt->exception_task = task;
 838         elt->exception_thread = thread;
 839
 840         simple_lock(&thread_exception_lock);
 841         enqueue_tail(&thread_exception_queue, (queue_entry_t)elt);
 842         simple_unlock(&thread_exception_lock);
 843
 844         thread_wakeup((event_t)&thread_exception_queue);
 845 }
 846
 847 /*
 848  *      thread_copy_resource_info
 849  *
 850  *      Copy the resource info counters from source
 851  *      thread to destination thread.
 852  */
 853 void
 854 thread_copy_resource_info(
 855         thread_t dst_thread,
 856         thread_t src_thread)
 857 {
 858         dst_thread->thread_tag = src_thread->thread_tag;
 859         dst_thread->c_switch = src_thread->c_switch;
 860         dst_thread->p_switch = src_thread->p_switch;
 861         dst_thread->ps_switch = src_thread->ps_switch;
 862         dst_thread->precise_user_kernel_time = src_thread->precise_user_kernel_time;
 863         dst_thread->user_timer = src_thread->user_timer;
 864         dst_thread->user_timer_save = src_thread->user_timer_save;
 865         dst_thread->system_timer = src_thread->system_timer;
 866         dst_thread->system_timer_save = src_thread->system_timer_save;
 867         dst_thread->vtimer_user_save = src_thread->vtimer_user_save;
 868         dst_thread->vtimer_prof_save = src_thread->vtimer_prof_save;
 869         dst_thread->vtimer_rlim_save = src_thread->vtimer_rlim_save;
 870         dst_thread->vtimer_qos_save = src_thread->vtimer_qos_save;
 871         dst_thread->syscalls_unix = src_thread->syscalls_unix;
 872         dst_thread->syscalls_mach = src_thread->syscalls_mach;
 873         ledger_rollup(dst_thread->t_threadledger, src_thread->t_threadledger);
 874         *dst_thread->thread_io_stats = *src_thread->thread_io_stats;
 875 }
 876
 877 /*
 878  *      thread_terminate_daemon:
 879  *
 880  *      Perform final clean up for terminating threads.
 881  */
 882 static void
 883 thread_terminate_daemon(void)
 884 {
 885         thread_t        self, thread;
 886         task_t          task;
 887
 888         self = current_thread();
 889         self->options |= TH_OPT_SYSTEM_CRITICAL;
 890
 891         (void)splsched();
 892         simple_lock(&thread_terminate_lock);
 893
 894         while ((thread = qe_dequeue_head(&thread_terminate_queue, struct thread, runq_links)) != THREAD_NULL) {
 895                 assert_thread_magic(thread);
 896
 897                 /*
 898                  * if marked for crash reporting, skip reaping.
 899                  * The corpse delivery thread will clear bit and enqueue
 900                  * for reaping when done
 901                  */
 902                 if (thread->inspection){
 903                         enqueue_tail(&crashed_threads_queue, &thread->runq_links);
 904                         continue;
 905                 }
 906
 907                 simple_unlock(&thread_terminate_lock);
 908                 (void)spllo();
 909
 910                 task = thread->task;
 911
 912                 task_lock(task);
 913                 task->total_user_time += timer_grab(&thread->user_timer);
 914                 task->total_ptime += timer_grab(&thread->ptime);
 915                 if (thread->precise_user_kernel_time) {
 916                         task->total_system_time += timer_grab(&thread->system_timer);
 917                 } else {
 918                         task->total_user_time += timer_grab(&thread->system_timer);
 919                 }
 920
 921                 task->c_switch += thread->c_switch;
 922                 task->p_switch += thread->p_switch;
 923                 task->ps_switch += thread->ps_switch;
 924
 925                 task->syscalls_unix += thread->syscalls_unix;
 926                 task->syscalls_mach += thread->syscalls_mach;
 927
 928                 task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
 929                 task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
 930                 task->task_gpu_ns += ml_gpu_stat(thread);
 931                 task->task_energy += ml_energy_stat(thread);
 932
 933 #if MONOTONIC
 934                 mt_terminate_update(task, thread);
 935 #endif /* MONOTONIC */
 936
 937                 thread_update_qos_cpu_time(thread);
 938
 939                 queue_remove(&task->threads, thread, thread_t, task_threads);
 940                 task->thread_count--;
 941
 942                 /*
 943                  * If the task is being halted, and there is only one thread
 944                  * left in the task after this one, then wakeup that thread.
 945                  */
 946                 if (task->thread_count == 1 && task->halting)
 947                         thread_wakeup((event_t)&task->halting);
 948
 949                 task_unlock(task);
 950
 951                 lck_mtx_lock(&tasks_threads_lock);
 952                 queue_remove(&threads, thread, thread_t, threads);
 953                 threads_count--;
 954                 lck_mtx_unlock(&tasks_threads_lock);
 955
 956                 thread_deallocate(thread);
 957
 958                 (void)splsched();
 959                 simple_lock(&thread_terminate_lock);
 960         }
 961
 962         assert_wait((event_t)&thread_terminate_queue, THREAD_UNINT);
 963         simple_unlock(&thread_terminate_lock);
 964         /* splsched */
 965
 966         self->options &= ~TH_OPT_SYSTEM_CRITICAL;
 967         thread_block((thread_continue_t)thread_terminate_daemon);
 968         /*NOTREACHED*/
 969 }
 970
 971 /*
 972  *      thread_terminate_enqueue:
 973  *
 974  *      Enqueue a terminating thread for final disposition.
 975  *
 976  *      Called at splsched.
 977  */
 978 void
 979 thread_terminate_enqueue(
 980         thread_t                thread)
 981 {
 982         KERNEL_DEBUG_CONSTANT(TRACE_DATA_THREAD_TERMINATE | DBG_FUNC_NONE, thread->thread_id, 0, 0, 0, 0);
 983
 984         simple_lock(&thread_terminate_lock);
 985         enqueue_tail(&thread_terminate_queue, &thread->runq_links);
 986         simple_unlock(&thread_terminate_lock);
 987
 988         thread_wakeup((event_t)&thread_terminate_queue);
 989 }
 990
 991 /*
 992  * thread_terminate_crashed_threads:
 993  * walk the list of crashed threads and put back set of threads
 994  * who are no longer being inspected.
 995  */
 996 void
 997 thread_terminate_crashed_threads()
 998 {
 999         thread_t th_remove;
1000         boolean_t should_wake_terminate_queue = FALSE;
1001
1002         simple_lock(&thread_terminate_lock);
1003         /*
1004          * loop through the crashed threads queue
1005          * to put any threads that are not being inspected anymore
1006          */
1007
1008         qe_foreach_element_safe(th_remove, &crashed_threads_queue, runq_links) {
1009                 /* make sure current_thread is never in crashed queue */
1010                 assert(th_remove != current_thread());
1011
1012                 if (th_remove->inspection == FALSE) {
1013                         re_queue_tail(&thread_terminate_queue, &th_remove->runq_links);
1014                         should_wake_terminate_queue = TRUE;
1015                 }
1016         }
1017
1018         simple_unlock(&thread_terminate_lock);
1019         if (should_wake_terminate_queue == TRUE) {
1020                 thread_wakeup((event_t)&thread_terminate_queue);
1021         }
1022 }
1023
1024 /*
1025  *      thread_stack_daemon:
1026  *
1027  *      Perform stack allocation as required due to
1028  *      invoke failures.
1029  */
1030 static void
1031 thread_stack_daemon(void)
1032 {
1033         thread_t                thread;
1034         spl_t                   s;
1035
1036         s = splsched();
1037         simple_lock(&thread_stack_lock);
1038
1039         while ((thread = qe_dequeue_head(&thread_stack_queue, struct thread, runq_links)) != THREAD_NULL) {
1040                 assert_thread_magic(thread);
1041
1042                 simple_unlock(&thread_stack_lock);
1043                 splx(s);
1044
1045                 /* allocate stack with interrupts enabled so that we can call into VM */
1046                 stack_alloc(thread);
1047
1048                 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_END, thread_tid(thread), 0, 0, 0, 0);
1049
1050                 s = splsched();
1051                 thread_lock(thread);
1052                 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1053                 thread_unlock(thread);
1054
1055                 simple_lock(&thread_stack_lock);
1056         }
1057
1058         assert_wait((event_t)&thread_stack_queue, THREAD_UNINT);
1059         simple_unlock(&thread_stack_lock);
1060         splx(s);
1061
1062         thread_block((thread_continue_t)thread_stack_daemon);
1063         /*NOTREACHED*/
1064 }
1065
1066 /*
1067  *      thread_stack_enqueue:
1068  *
1069  *      Enqueue a thread for stack allocation.
1070  *
1071  *      Called at splsched.
1072  */
1073 void
1074 thread_stack_enqueue(
1075         thread_t                thread)
1076 {
1077         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_START, thread_tid(thread), 0, 0, 0, 0);
1078         assert_thread_magic(thread);
1079
1080         simple_lock(&thread_stack_lock);
1081         enqueue_tail(&thread_stack_queue, &thread->runq_links);
1082         simple_unlock(&thread_stack_lock);
1083
1084         thread_wakeup((event_t)&thread_stack_queue);
1085 }
1086
1087 void
1088 thread_daemon_init(void)
1089 {
1090         kern_return_t   result;
1091         thread_t        thread = NULL;
1092
1093         simple_lock_init(&thread_terminate_lock, 0);
1094         queue_init(&thread_terminate_queue);
1095         queue_init(&crashed_threads_queue);
1096
1097         result = kernel_thread_start_priority((thread_continue_t)thread_terminate_daemon, NULL, MINPRI_KERNEL, &thread);
1098         if (result != KERN_SUCCESS)
1099                 panic("thread_daemon_init: thread_terminate_daemon");
1100
1101         thread_deallocate(thread);
1102
1103         simple_lock_init(&thread_stack_lock, 0);
1104         queue_init(&thread_stack_queue);
1105
1106         result = kernel_thread_start_priority((thread_continue_t)thread_stack_daemon, NULL, BASEPRI_PREEMPT_HIGH, &thread);
1107         if (result != KERN_SUCCESS)
1108                 panic("thread_daemon_init: thread_stack_daemon");
1109
1110         thread_deallocate(thread);
1111
1112         simple_lock_init(&thread_exception_lock, 0);
1113         queue_init(&thread_exception_queue);
1114
1115         result = kernel_thread_start_priority((thread_continue_t)thread_exception_daemon, NULL, MINPRI_KERNEL, &thread);
1116         if (result != KERN_SUCCESS)
1117                 panic("thread_daemon_init: thread_exception_daemon");
1118
1119         thread_deallocate(thread);
1120 }
1121
1122 #define TH_OPTION_NONE          0x00
1123 #define TH_OPTION_NOCRED        0x01
1124 #define TH_OPTION_NOSUSP        0x02
1125
1126 /*
1127  * Create a new thread.
1128  * Doesn't start the thread running.
1129  *
1130  * Task and tasks_threads_lock are returned locked on success.
1131  */
1132 static kern_return_t
1133 thread_create_internal(
1134         task_t                                  parent_task,
1135         integer_t                               priority,
1136         thread_continue_t               continuation,
1137         int                                             options,
1138         thread_t                                *out_thread)
1139 {
1140         thread_t                                new_thread;
1141         static thread_t                 first_thread;
1142
1143         /*
1144          *      Allocate a thread and initialize static fields
1145          */
1146         if (first_thread == THREAD_NULL)
1147                 new_thread = first_thread = current_thread();
1148         else
1149                 new_thread = (thread_t)zalloc(thread_zone);
1150         if (new_thread == THREAD_NULL)
1151                 return (KERN_RESOURCE_SHORTAGE);
1152
1153         if (new_thread != first_thread)
1154                 *new_thread = thread_template;
1155
1156 #ifdef MACH_BSD
1157         new_thread->uthread = uthread_alloc(parent_task, new_thread, (options & TH_OPTION_NOCRED) != 0);
1158         if (new_thread->uthread == NULL) {
1159 #if MACH_ASSERT
1160                 new_thread->thread_magic = 0;
1161 #endif /* MACH_ASSERT */
1162
1163                 zfree(thread_zone, new_thread);
1164                 return (KERN_RESOURCE_SHORTAGE);
1165         }
1166 #endif  /* MACH_BSD */
1167
1168         if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) {
1169 #ifdef MACH_BSD
1170                 void *ut = new_thread->uthread;
1171
1172                 new_thread->uthread = NULL;
1173                 /* cred free may not be necessary */
1174                 uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1175                 uthread_cred_free(ut);
1176                 uthread_zone_free(ut);
1177 #endif  /* MACH_BSD */
1178
1179 #if MACH_ASSERT
1180                 new_thread->thread_magic = 0;
1181 #endif /* MACH_ASSERT */
1182
1183                 zfree(thread_zone, new_thread);
1184                 return (KERN_FAILURE);
1185         }
1186
1187         new_thread->task = parent_task;
1188
1189         thread_lock_init(new_thread);
1190         wake_lock_init(new_thread);
1191
1192         lck_mtx_init(&new_thread->mutex, &thread_lck_grp, &thread_lck_attr);
1193
1194         ipc_thread_init(new_thread);
1195
1196         new_thread->continuation = continuation;
1197
1198         /* Allocate I/O Statistics structure */
1199         new_thread->thread_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1200         assert(new_thread->thread_io_stats != NULL);
1201         bzero(new_thread->thread_io_stats, sizeof(struct io_stat_info));
1202         new_thread->sync_ipc_overrides = 0;
1203
1204 #if KASAN
1205         kasan_init_thread(&new_thread->kasan_data);
1206 #endif
1207
1208 #if CONFIG_IOSCHED
1209         /* Clear out the I/O Scheduling info for AppleFSCompression */
1210         new_thread->decmp_upl = NULL;
1211 #endif /* CONFIG_IOSCHED */
1212
1213         lck_mtx_lock(&tasks_threads_lock);
1214         task_lock(parent_task);
1215
1216         /*
1217          * Fail thread creation if parent task is being torn down or has too many threads
1218          * If the caller asked for TH_OPTION_NOSUSP, also fail if the parent task is suspended
1219          */
1220         if (parent_task->active == 0 || parent_task->halting ||
1221             (parent_task->suspend_count > 0 && (options & TH_OPTION_NOSUSP) != 0) ||
1222             (parent_task->thread_count >= task_threadmax && parent_task != kernel_task)) {
1223                 task_unlock(parent_task);
1224                 lck_mtx_unlock(&tasks_threads_lock);
1225
1226 #ifdef MACH_BSD
1227                 {
1228                         void *ut = new_thread->uthread;
1229
1230                         new_thread->uthread = NULL;
1231                         uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1232                         /* cred free may not be necessary */
1233                         uthread_cred_free(ut);
1234                         uthread_zone_free(ut);
1235                 }
1236 #endif  /* MACH_BSD */
1237                 ipc_thread_disable(new_thread);
1238                 ipc_thread_terminate(new_thread);
1239                 kfree(new_thread->thread_io_stats, sizeof(struct io_stat_info));
1240                 lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp);
1241                 machine_thread_destroy(new_thread);
1242                 zfree(thread_zone, new_thread);
1243                 return (KERN_FAILURE);
1244         }
1245
1246         /* New threads inherit any default state on the task */
1247         machine_thread_inherit_taskwide(new_thread, parent_task);
1248
1249         task_reference_internal(parent_task);
1250
1251         if (new_thread->task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
1252                 /*
1253                  * This task has a per-thread CPU limit; make sure this new thread
1254                  * gets its limit set too, before it gets out of the kernel.
1255                  */
1256                 set_astledger(new_thread);
1257         }
1258
1259         /* Instantiate a thread ledger. Do not fail thread creation if ledger creation fails. */
1260         if ((new_thread->t_threadledger = ledger_instantiate(thread_ledger_template,
1261                                 LEDGER_CREATE_INACTIVE_ENTRIES)) != LEDGER_NULL) {
1262
1263                 ledger_entry_setactive(new_thread->t_threadledger, thread_ledgers.cpu_time);
1264         }
1265
1266         new_thread->t_bankledger = LEDGER_NULL;
1267         new_thread->t_deduct_bank_ledger_time = 0;
1268         new_thread->t_deduct_bank_ledger_energy = 0;
1269
1270         new_thread->t_ledger = new_thread->task->ledger;
1271         if (new_thread->t_ledger)
1272                 ledger_reference(new_thread->t_ledger);
1273
1274 #if defined(CONFIG_SCHED_MULTIQ)
1275         /* Cache the task's sched_group */
1276         new_thread->sched_group = parent_task->sched_group;
1277 #endif /* defined(CONFIG_SCHED_MULTIQ) */
1278
1279         /* Cache the task's map */
1280         new_thread->map = parent_task->map;
1281
1282         timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread);
1283         timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread);
1284
1285 #if KPC
1286         kpc_thread_create(new_thread);
1287 #endif
1288
1289         /* Set the thread's scheduling parameters */
1290         new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task);
1291         new_thread->max_priority = parent_task->max_priority;
1292         new_thread->task_priority = parent_task->priority;
1293
1294         int new_priority = (priority < 0) ? parent_task->priority: priority;
1295         new_priority = (priority < 0)? parent_task->priority: priority;
1296         if (new_priority > new_thread->max_priority)
1297                 new_priority = new_thread->max_priority;
1298 #if CONFIG_EMBEDDED
1299         if (new_priority < MAXPRI_THROTTLE) {
1300                 new_priority = MAXPRI_THROTTLE;
1301         }
1302 #endif /* CONFIG_EMBEDDED */
1303
1304         new_thread->importance = new_priority - new_thread->task_priority;
1305
1306         sched_set_thread_base_priority(new_thread, new_priority);
1307
1308 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1309         new_thread->sched_stamp = sched_tick;
1310         new_thread->pri_shift = sched_pri_shifts[new_thread->th_sched_bucket];
1311 #endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) */
1312
1313 #if CONFIG_EMBEDDED
1314         if (parent_task->max_priority <= MAXPRI_THROTTLE)
1315                 sched_thread_mode_demote(new_thread, TH_SFLAG_THROTTLED);
1316 #endif /* CONFIG_EMBEDDED */
1317
1318         thread_policy_create(new_thread);
1319
1320         /* Chain the thread onto the task's list */
1321         queue_enter(&parent_task->threads, new_thread, thread_t, task_threads);
1322         parent_task->thread_count++;
1323
1324         /* So terminating threads don't need to take the task lock to decrement */
1325         hw_atomic_add(&parent_task->active_thread_count, 1);
1326
1327         /* Protected by the tasks_threads_lock */
1328         new_thread->thread_id = ++thread_unique_id;
1329
1330
1331         queue_enter(&threads, new_thread, thread_t, threads);
1332         threads_count++;
1333
1334         new_thread->active = TRUE;
1335         if (task_is_a_corpse_fork(parent_task)) {
1336                 /* Set the inspection bit if the task is a corpse fork */
1337                 new_thread->inspection = TRUE;
1338         } else {
1339                 new_thread->inspection = FALSE;
1340         }
1341         new_thread->corpse_dup = FALSE;
1342         *out_thread = new_thread;
1343
1344         {
1345                 long    dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
1346
1347                 kdbg_trace_data(parent_task->bsd_info, &dbg_arg2, &dbg_arg4);
1348
1349                 /*
1350                  * Starting with 26604425, exec'ing creates a new task/thread.
1351                  *
1352                  * NEWTHREAD in the current process has two possible meanings:
1353                  *
1354                  * 1) Create a new thread for this process.
1355                  * 2) Create a new thread for the future process this will become in an exec.
1356                  *
1357                  * To disambiguate these, arg3 will be set to TRUE for case #2.
1358                  *
1359                  * The value we need to find (TPF_EXEC_COPY) is stable in the case of a
1360                  * task exec'ing. The read of t_procflags does not take the proc_lock.
1361                  */
1362                 dbg_arg3 = (task_is_exec_copy(parent_task)) ? TRUE : 0;
1363
1364
1365                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1366                         TRACE_DATA_NEWTHREAD | DBG_FUNC_NONE,
1367                         (vm_address_t)(uintptr_t)thread_tid(new_thread), dbg_arg2, dbg_arg3, dbg_arg4, 0);
1368
1369                 kdbg_trace_string(parent_task->bsd_info,
1370                                                         &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
1371
1372                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1373                         TRACE_STRING_NEWTHREAD | DBG_FUNC_NONE,
1374                         dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
1375         }
1376
1377         DTRACE_PROC1(lwp__create, thread_t, *out_thread);
1378
1379         return (KERN_SUCCESS);
1380 }
1381
1382 static kern_return_t
1383 thread_create_internal2(
1384         task_t                          task,
1385         thread_t                        *new_thread,
1386         boolean_t                       from_user,
1387         thread_continue_t               continuation)
1388 {
1389         kern_return_t           result;
1390         thread_t                        thread;
1391
1392         if (task == TASK_NULL || task == kernel_task)
1393                 return (KERN_INVALID_ARGUMENT);
1394
1395         result = thread_create_internal(task, -1, continuation, TH_OPTION_NONE, &thread);
1396         if (result != KERN_SUCCESS)
1397                 return (result);
1398
1399         thread->user_stop_count = 1;
1400         thread_hold(thread);
1401         if (task->suspend_count > 0)
1402                 thread_hold(thread);
1403
1404         if (from_user)
1405                 extmod_statistics_incr_thread_create(task);
1406
1407         task_unlock(task);
1408         lck_mtx_unlock(&tasks_threads_lock);
1409
1410         *new_thread = thread;
1411
1412         return (KERN_SUCCESS);
1413 }
1414
1415 /* No prototype, since task_server.h has the _from_user version if KERNEL_SERVER */
1416 kern_return_t
1417 thread_create(
1418         task_t                          task,
1419         thread_t                        *new_thread);
1420
1421 kern_return_t
1422 thread_create(
1423         task_t                          task,
1424         thread_t                        *new_thread)
1425 {
1426         return thread_create_internal2(task, new_thread, FALSE, (thread_continue_t)thread_bootstrap_return);
1427 }
1428
1429 kern_return_t
1430 thread_create_from_user(
1431         task_t                          task,
1432         thread_t                        *new_thread)
1433 {
1434         return thread_create_internal2(task, new_thread, TRUE, (thread_continue_t)thread_bootstrap_return);
1435 }
1436
1437 kern_return_t
1438 thread_create_with_continuation(
1439         task_t                          task,
1440         thread_t                        *new_thread,
1441         thread_continue_t               continuation)
1442 {
1443         return thread_create_internal2(task, new_thread, FALSE, continuation);
1444 }
1445
1446 /*
1447  * Create a thread that is already started, but is waiting on an event
1448  */
1449 static kern_return_t
1450 thread_create_waiting_internal(
1451         task_t                  task,
1452         thread_continue_t       continuation,
1453         event_t                 event,
1454         int                     options,
1455         thread_t                *new_thread)
1456 {
1457         kern_return_t result;
1458         thread_t thread;
1459
1460         if (task == TASK_NULL || task == kernel_task)
1461                 return (KERN_INVALID_ARGUMENT);
1462
1463         result = thread_create_internal(task, -1, continuation, options, &thread);
1464         if (result != KERN_SUCCESS)
1465                 return (result);
1466
1467         /* note no user_stop_count or thread_hold here */
1468
1469         if (task->suspend_count > 0)
1470                 thread_hold(thread);
1471
1472         thread_mtx_lock(thread);
1473         thread_start_in_assert_wait(thread, event, THREAD_INTERRUPTIBLE);
1474         thread_mtx_unlock(thread);
1475
1476         task_unlock(task);
1477         lck_mtx_unlock(&tasks_threads_lock);
1478
1479         *new_thread = thread;
1480
1481         return (KERN_SUCCESS);
1482 }
1483
1484 kern_return_t
1485 thread_create_waiting(
1486         task_t                  task,
1487         thread_continue_t       continuation,
1488         event_t                 event,
1489         thread_t                *new_thread)
1490 {
1491         return thread_create_waiting_internal(task, continuation, event,
1492                                               TH_OPTION_NONE, new_thread);
1493 }
1494
1495
1496 static kern_return_t
1497 thread_create_running_internal2(
1498         task_t         task,
1499         int                     flavor,
1500         thread_state_t          new_state,
1501         mach_msg_type_number_t  new_state_count,
1502         thread_t                                *new_thread,
1503         boolean_t                               from_user)
1504 {
1505         kern_return_t  result;
1506         thread_t                                thread;
1507
1508         if (task == TASK_NULL || task == kernel_task)
1509                 return (KERN_INVALID_ARGUMENT);
1510
1511         result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, TH_OPTION_NONE, &thread);
1512         if (result != KERN_SUCCESS)
1513                 return (result);
1514
1515         if (task->suspend_count > 0)
1516                 thread_hold(thread);
1517
1518         result = machine_thread_set_state(thread, flavor, new_state, new_state_count);
1519         if (result != KERN_SUCCESS) {
1520                 task_unlock(task);
1521                 lck_mtx_unlock(&tasks_threads_lock);
1522
1523                 thread_terminate(thread);
1524                 thread_deallocate(thread);
1525                 return (result);
1526         }
1527
1528         thread_mtx_lock(thread);
1529         thread_start(thread);
1530         thread_mtx_unlock(thread);
1531
1532         if (from_user)
1533                 extmod_statistics_incr_thread_create(task);
1534
1535         task_unlock(task);
1536         lck_mtx_unlock(&tasks_threads_lock);
1537
1538         *new_thread = thread;
1539
1540         return (result);
1541 }
1542
1543 /* Prototype, see justification above */
1544 kern_return_t
1545 thread_create_running(
1546         task_t         task,
1547         int                     flavor,
1548         thread_state_t          new_state,
1549         mach_msg_type_number_t  new_state_count,
1550         thread_t                                *new_thread);
1551
1552 kern_return_t
1553 thread_create_running(
1554         task_t         task,
1555         int                     flavor,
1556         thread_state_t          new_state,
1557         mach_msg_type_number_t  new_state_count,
1558         thread_t                                *new_thread)
1559 {
1560         return thread_create_running_internal2(
1561                 task, flavor, new_state, new_state_count,
1562                 new_thread, FALSE);
1563 }
1564
1565 kern_return_t
1566 thread_create_running_from_user(
1567         task_t         task,
1568         int                     flavor,
1569         thread_state_t          new_state,
1570         mach_msg_type_number_t  new_state_count,
1571         thread_t                                *new_thread)
1572 {
1573         return thread_create_running_internal2(
1574                 task, flavor, new_state, new_state_count,
1575                 new_thread, TRUE);
1576 }
1577
1578 kern_return_t
1579 thread_create_workq(
1580         task_t                          task,
1581         thread_continue_t               thread_return,
1582         thread_t                        *new_thread)
1583 {
1584         kern_return_t           result;
1585         thread_t                        thread;
1586
1587         if (task == TASK_NULL || task == kernel_task)
1588                 return (KERN_INVALID_ARGUMENT);
1589
1590         result = thread_create_internal(task, -1, thread_return, TH_OPTION_NOCRED | TH_OPTION_NOSUSP, &thread);
1591         if (result != KERN_SUCCESS)
1592                 return (result);
1593
1594         thread->user_stop_count = 1;
1595         thread_hold(thread);
1596         if (task->suspend_count > 0)
1597                 thread_hold(thread);
1598
1599         task_unlock(task);
1600         lck_mtx_unlock(&tasks_threads_lock);
1601
1602         *new_thread = thread;
1603
1604         return (KERN_SUCCESS);
1605 }
1606
1607 kern_return_t
1608 thread_create_workq_waiting(
1609         task_t              task,
1610         thread_continue_t   continuation,
1611         event_t             event,
1612         thread_t            *new_thread)
1613 {
1614
1615         return thread_create_waiting_internal(task, continuation, event,
1616                                               TH_OPTION_NOCRED | TH_OPTION_NOSUSP,
1617                                               new_thread);
1618 }
1619
1620 /*
1621  *      kernel_thread_create:
1622  *
1623  *      Create a thread in the kernel task
1624  *      to execute in kernel context.
1625  */
1626 kern_return_t
1627 kernel_thread_create(
1628         thread_continue_t       continuation,
1629         void                            *parameter,
1630         integer_t                       priority,
1631         thread_t                        *new_thread)
1632 {
1633         kern_return_t           result;
1634         thread_t                        thread;
1635         task_t                          task = kernel_task;
1636
1637         result = thread_create_internal(task, priority, continuation, TH_OPTION_NOCRED | TH_OPTION_NONE, &thread);
1638         if (result != KERN_SUCCESS)
1639                 return (result);
1640
1641         task_unlock(task);
1642         lck_mtx_unlock(&tasks_threads_lock);
1643
1644         stack_alloc(thread);
1645         assert(thread->kernel_stack != 0);
1646 #if CONFIG_EMBEDDED
1647         if (priority > BASEPRI_KERNEL)
1648 #endif
1649         thread->reserved_stack = thread->kernel_stack;
1650
1651         thread->parameter = parameter;
1652
1653 if(debug_task & 1)
1654         kprintf("kernel_thread_create: thread = %p continuation = %p\n", thread, continuation);
1655         *new_thread = thread;
1656
1657         return (result);
1658 }
1659
1660 kern_return_t
1661 kernel_thread_start_priority(
1662         thread_continue_t       continuation,
1663         void                            *parameter,
1664         integer_t                       priority,
1665         thread_t                        *new_thread)
1666 {
1667         kern_return_t   result;
1668         thread_t                thread;
1669
1670         result = kernel_thread_create(continuation, parameter, priority, &thread);
1671         if (result != KERN_SUCCESS)
1672                 return (result);
1673
1674         *new_thread = thread;
1675
1676         thread_mtx_lock(thread);
1677         thread_start(thread);
1678         thread_mtx_unlock(thread);
1679
1680         return (result);
1681 }
1682
1683 kern_return_t
1684 kernel_thread_start(
1685         thread_continue_t       continuation,
1686         void                            *parameter,
1687         thread_t                        *new_thread)
1688 {
1689         return kernel_thread_start_priority(continuation, parameter, -1, new_thread);
1690 }
1691
1692 /* Separated into helper function so it can be used by THREAD_BASIC_INFO and THREAD_EXTENDED_INFO */
1693 /* it is assumed that the thread is locked by the caller */
1694 static void
1695 retrieve_thread_basic_info(thread_t thread, thread_basic_info_t basic_info)
1696 {
1697         int     state, flags;
1698
1699         /* fill in info */
1700
1701         thread_read_times(thread, &basic_info->user_time,
1702                                                                 &basic_info->system_time);
1703
1704         /*
1705          *      Update lazy-evaluated scheduler info because someone wants it.
1706          */
1707         if (SCHED(can_update_priority)(thread))
1708                 SCHED(update_priority)(thread);
1709
1710         basic_info->sleep_time = 0;
1711
1712         /*
1713          *      To calculate cpu_usage, first correct for timer rate,
1714          *      then for 5/8 ageing.  The correction factor [3/5] is
1715          *      (1/(5/8) - 1).
1716          */
1717         basic_info->cpu_usage = 0;
1718 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1719         if (sched_tick_interval) {
1720                 basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage
1721                                                                         * TH_USAGE_SCALE) /     sched_tick_interval);
1722                 basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
1723         }
1724 #endif
1725
1726         if (basic_info->cpu_usage > TH_USAGE_SCALE)
1727                 basic_info->cpu_usage = TH_USAGE_SCALE;
1728
1729         basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
1730                                                                                         POLICY_TIMESHARE: POLICY_RR);
1731
1732         flags = 0;
1733         if (thread->options & TH_OPT_IDLE_THREAD)
1734                 flags |= TH_FLAGS_IDLE;
1735
1736         if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
1737                 flags |= TH_FLAGS_GLOBAL_FORCED_IDLE;
1738         }
1739
1740         if (!thread->kernel_stack)
1741                 flags |= TH_FLAGS_SWAPPED;
1742
1743         state = 0;
1744         if (thread->state & TH_TERMINATE)
1745                 state = TH_STATE_HALTED;
1746         else
1747         if (thread->state & TH_RUN)
1748                 state = TH_STATE_RUNNING;
1749         else
1750         if (thread->state & TH_UNINT)
1751                 state = TH_STATE_UNINTERRUPTIBLE;
1752         else
1753         if (thread->state & TH_SUSP)
1754                 state = TH_STATE_STOPPED;
1755         else
1756         if (thread->state & TH_WAIT)
1757                 state = TH_STATE_WAITING;
1758
1759         basic_info->run_state = state;
1760         basic_info->flags = flags;
1761
1762         basic_info->suspend_count = thread->user_stop_count;
1763
1764         return;
1765 }
1766
1767 kern_return_t
1768 thread_info_internal(
1769         thread_t                thread,
1770         thread_flavor_t                 flavor,
1771         thread_info_t                   thread_info_out,        /* ptr to OUT array */
1772         mach_msg_type_number_t  *thread_info_count)     /*IN/OUT*/
1773 {
1774         spl_t   s;
1775
1776         if (thread == THREAD_NULL)
1777                 return (KERN_INVALID_ARGUMENT);
1778
1779         if (flavor == THREAD_BASIC_INFO) {
1780
1781                 if (*thread_info_count < THREAD_BASIC_INFO_COUNT)
1782                         return (KERN_INVALID_ARGUMENT);
1783
1784                 s = splsched();
1785                 thread_lock(thread);
1786
1787                 retrieve_thread_basic_info(thread, (thread_basic_info_t) thread_info_out);
1788
1789                 thread_unlock(thread);
1790                 splx(s);
1791
1792                 *thread_info_count = THREAD_BASIC_INFO_COUNT;
1793
1794                 return (KERN_SUCCESS);
1795         }
1796         else
1797         if (flavor == THREAD_IDENTIFIER_INFO) {
1798                 thread_identifier_info_t        identifier_info;
1799
1800                 if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT)
1801                         return (KERN_INVALID_ARGUMENT);
1802
1803                 identifier_info = (thread_identifier_info_t) thread_info_out;
1804
1805                 s = splsched();
1806                 thread_lock(thread);
1807
1808                 identifier_info->thread_id = thread->thread_id;
1809                 identifier_info->thread_handle = thread->machine.cthread_self;
1810                 identifier_info->dispatch_qaddr = thread_dispatchqaddr(thread);
1811
1812                 thread_unlock(thread);
1813                 splx(s);
1814                 return KERN_SUCCESS;
1815         }
1816         else
1817         if (flavor == THREAD_SCHED_TIMESHARE_INFO) {
1818                 policy_timeshare_info_t         ts_info;
1819
1820                 if (*thread_info_count < POLICY_TIMESHARE_INFO_COUNT)
1821                         return (KERN_INVALID_ARGUMENT);
1822
1823                 ts_info = (policy_timeshare_info_t)thread_info_out;
1824
1825                 s = splsched();
1826                 thread_lock(thread);
1827
1828                 if (thread->sched_mode != TH_MODE_TIMESHARE) {
1829                         thread_unlock(thread);
1830                         splx(s);
1831                         return (KERN_INVALID_POLICY);
1832                 }
1833
1834                 ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
1835                 if (ts_info->depressed) {
1836                         ts_info->base_priority = DEPRESSPRI;
1837                         ts_info->depress_priority = thread->base_pri;
1838                 }
1839                 else {
1840                         ts_info->base_priority = thread->base_pri;
1841                         ts_info->depress_priority = -1;
1842                 }
1843
1844                 ts_info->cur_priority = thread->sched_pri;
1845                 ts_info->max_priority = thread->max_priority;
1846
1847                 thread_unlock(thread);
1848                 splx(s);
1849
1850                 *thread_info_count = POLICY_TIMESHARE_INFO_COUNT;
1851
1852                 return (KERN_SUCCESS);
1853         }
1854         else
1855         if (flavor == THREAD_SCHED_FIFO_INFO) {
1856                 if (*thread_info_count < POLICY_FIFO_INFO_COUNT)
1857                         return (KERN_INVALID_ARGUMENT);
1858
1859                 return (KERN_INVALID_POLICY);
1860         }
1861         else
1862         if (flavor == THREAD_SCHED_RR_INFO) {
1863                 policy_rr_info_t                        rr_info;
1864                 uint32_t quantum_time;
1865                 uint64_t quantum_ns;
1866
1867                 if (*thread_info_count < POLICY_RR_INFO_COUNT)
1868                         return (KERN_INVALID_ARGUMENT);
1869
1870                 rr_info = (policy_rr_info_t) thread_info_out;
1871
1872                 s = splsched();
1873                 thread_lock(thread);
1874
1875                 if (thread->sched_mode == TH_MODE_TIMESHARE) {
1876                         thread_unlock(thread);
1877                         splx(s);
1878
1879                         return (KERN_INVALID_POLICY);
1880             }
1881
1882                 rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
1883                 if (rr_info->depressed) {
1884                         rr_info->base_priority = DEPRESSPRI;
1885                         rr_info->depress_priority = thread->base_pri;
1886                 }
1887                 else {
1888                         rr_info->base_priority = thread->base_pri;
1889                         rr_info->depress_priority = -1;
1890                 }
1891
1892                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
1893                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
1894
1895                 rr_info->max_priority = thread->max_priority;
1896                 rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
1897
1898                 thread_unlock(thread);
1899                 splx(s);
1900
1901                 *thread_info_count = POLICY_RR_INFO_COUNT;
1902
1903                 return (KERN_SUCCESS);
1904         }
1905         else
1906         if (flavor == THREAD_EXTENDED_INFO) {
1907                 thread_basic_info_data_t        basic_info;
1908                 thread_extended_info_t          extended_info = (thread_extended_info_t) thread_info_out;
1909
1910                 if (*thread_info_count < THREAD_EXTENDED_INFO_COUNT) {
1911                         return (KERN_INVALID_ARGUMENT);
1912                 }
1913
1914                 s = splsched();
1915                 thread_lock(thread);
1916
1917                 /* NOTE: This mimics fill_taskthreadinfo(), which is the function used by proc_pidinfo() for
1918                  * the PROC_PIDTHREADINFO flavor (which can't be used on corpses)
1919                  */
1920                 retrieve_thread_basic_info(thread, &basic_info);
1921                 extended_info->pth_user_time = ((basic_info.user_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.user_time.microseconds * (integer_t)NSEC_PER_USEC));
1922                 extended_info->pth_system_time = ((basic_info.system_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.system_time.microseconds * (integer_t)NSEC_PER_USEC));
1923
1924                 extended_info->pth_cpu_usage = basic_info.cpu_usage;
1925                 extended_info->pth_policy = basic_info.policy;
1926                 extended_info->pth_run_state = basic_info.run_state;
1927                 extended_info->pth_flags = basic_info.flags;
1928                 extended_info->pth_sleep_time = basic_info.sleep_time;
1929                 extended_info->pth_curpri = thread->sched_pri;
1930                 extended_info->pth_priority = thread->base_pri;
1931                 extended_info->pth_maxpriority = thread->max_priority;
1932
1933                 bsd_getthreadname(thread->uthread,extended_info->pth_name);
1934
1935                 thread_unlock(thread);
1936                 splx(s);
1937
1938                 *thread_info_count = THREAD_EXTENDED_INFO_COUNT;
1939
1940                 return (KERN_SUCCESS);
1941         }
1942         else
1943         if (flavor == THREAD_DEBUG_INFO_INTERNAL) {
1944 #if DEVELOPMENT || DEBUG
1945                 thread_debug_info_internal_t dbg_info;
1946                 if (*thread_info_count < THREAD_DEBUG_INFO_INTERNAL_COUNT)
1947                         return (KERN_NOT_SUPPORTED);
1948
1949                 if (thread_info_out == NULL)
1950                         return (KERN_INVALID_ARGUMENT);
1951
1952                 dbg_info = (thread_debug_info_internal_t) thread_info_out;
1953                 dbg_info->page_creation_count = thread->t_page_creation_count;
1954
1955                 *thread_info_count = THREAD_DEBUG_INFO_INTERNAL_COUNT;
1956                 return (KERN_SUCCESS);
1957 #endif /* DEVELOPMENT || DEBUG */
1958                 return (KERN_NOT_SUPPORTED);
1959         }
1960
1961         return (KERN_INVALID_ARGUMENT);
1962 }
1963
1964 void
1965 thread_read_times(
1966         thread_t                thread,
1967         time_value_t    *user_time,
1968         time_value_t    *system_time)
1969 {
1970         clock_sec_t             secs;
1971         clock_usec_t    usecs;
1972         uint64_t                tval_user, tval_system;
1973
1974         tval_user = timer_grab(&thread->user_timer);
1975         tval_system = timer_grab(&thread->system_timer);
1976
1977         if (thread->precise_user_kernel_time) {
1978                 absolutetime_to_microtime(tval_user, &secs, &usecs);
1979                 user_time->seconds = (typeof(user_time->seconds))secs;
1980                 user_time->microseconds = usecs;
1981
1982                 absolutetime_to_microtime(tval_system, &secs, &usecs);
1983                 system_time->seconds = (typeof(system_time->seconds))secs;
1984                 system_time->microseconds = usecs;
1985         } else {
1986                 /* system_timer may represent either sys or user */
1987                 tval_user += tval_system;
1988                 absolutetime_to_microtime(tval_user, &secs, &usecs);
1989                 user_time->seconds = (typeof(user_time->seconds))secs;
1990                 user_time->microseconds = usecs;
1991
1992                 system_time->seconds = 0;
1993                 system_time->microseconds = 0;
1994         }
1995 }
1996
1997 uint64_t thread_get_runtime_self(void)
1998 {
1999         boolean_t interrupt_state;
2000         uint64_t runtime;
2001         thread_t thread = NULL;
2002         processor_t processor = NULL;
2003
2004         thread = current_thread();
2005
2006         /* Not interrupt safe, as the scheduler may otherwise update timer values underneath us */
2007         interrupt_state = ml_set_interrupts_enabled(FALSE);
2008         processor = current_processor();
2009         timer_switch(PROCESSOR_DATA(processor, thread_timer), mach_absolute_time(), PROCESSOR_DATA(processor, thread_timer));
2010         runtime = (timer_grab(&thread->user_timer) + timer_grab(&thread->system_timer));
2011         ml_set_interrupts_enabled(interrupt_state);
2012
2013         return runtime;
2014 }
2015
2016 kern_return_t
2017 thread_assign(
2018         __unused thread_t                       thread,
2019         __unused processor_set_t        new_pset)
2020 {
2021         return (KERN_FAILURE);
2022 }
2023
2024 /*
2025  *      thread_assign_default:
2026  *
2027  *      Special version of thread_assign for assigning threads to default
2028  *      processor set.
2029  */
2030 kern_return_t
2031 thread_assign_default(
2032         thread_t                thread)
2033 {
2034         return (thread_assign(thread, &pset0));
2035 }
2036
2037 /*
2038  *      thread_get_assignment
2039  *
2040  *      Return current assignment for this thread.
2041  */
2042 kern_return_t
2043 thread_get_assignment(
2044         thread_t                thread,
2045         processor_set_t *pset)
2046 {
2047         if (thread == NULL)
2048                 return (KERN_INVALID_ARGUMENT);
2049
2050         *pset = &pset0;
2051
2052         return (KERN_SUCCESS);
2053 }
2054
2055 /*
2056  *      thread_wire_internal:
2057  *
2058  *      Specify that the target thread must always be able
2059  *      to run and to allocate memory.
2060  */
2061 kern_return_t
2062 thread_wire_internal(
2063         host_priv_t             host_priv,
2064         thread_t                thread,
2065         boolean_t               wired,
2066         boolean_t               *prev_state)
2067 {
2068         if (host_priv == NULL || thread != current_thread())
2069                 return (KERN_INVALID_ARGUMENT);
2070
2071         assert(host_priv == &realhost);
2072
2073         if (prev_state)
2074             *prev_state = (thread->options & TH_OPT_VMPRIV) != 0;
2075
2076         if (wired) {
2077             if (!(thread->options & TH_OPT_VMPRIV))
2078                     vm_page_free_reserve(1);    /* XXX */
2079             thread->options |= TH_OPT_VMPRIV;
2080         }
2081         else {
2082             if (thread->options & TH_OPT_VMPRIV)
2083                     vm_page_free_reserve(-1);   /* XXX */
2084             thread->options &= ~TH_OPT_VMPRIV;
2085         }
2086
2087         return (KERN_SUCCESS);
2088 }
2089
2090
2091 /*
2092  *      thread_wire:
2093  *
2094  *      User-api wrapper for thread_wire_internal()
2095  */
2096 kern_return_t
2097 thread_wire(
2098         host_priv_t     host_priv,
2099         thread_t        thread,
2100         boolean_t       wired)
2101 {
2102     return (thread_wire_internal(host_priv, thread, wired, NULL));
2103 }
2104
2105
2106 boolean_t
2107 is_vm_privileged(void)
2108 {
2109         return current_thread()->options & TH_OPT_VMPRIV ? TRUE : FALSE;
2110 }
2111
2112 boolean_t
2113 set_vm_privilege(boolean_t privileged)
2114 {
2115         boolean_t       was_vmpriv;
2116
2117         if (current_thread()->options & TH_OPT_VMPRIV)
2118                 was_vmpriv = TRUE;
2119         else
2120                 was_vmpriv = FALSE;
2121
2122         if (privileged != FALSE)
2123                 current_thread()->options |= TH_OPT_VMPRIV;
2124         else
2125                 current_thread()->options &= ~TH_OPT_VMPRIV;
2126
2127         return (was_vmpriv);
2128 }
2129
2130 void
2131 set_thread_rwlock_boost(void)
2132 {
2133         current_thread()->rwlock_count++;
2134 }
2135
2136 void
2137 clear_thread_rwlock_boost(void)
2138 {
2139         thread_t thread = current_thread();
2140
2141         if ((thread->rwlock_count-- == 1) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2142
2143                 lck_rw_clear_promotion(thread);
2144         }
2145 }
2146
2147
2148 /*
2149  * XXX assuming current thread only, for now...
2150  */
2151 void
2152 thread_guard_violation(thread_t thread,
2153     mach_exception_data_type_t code, mach_exception_data_type_t subcode)
2154 {
2155         assert(thread == current_thread());
2156         assert(thread->task != kernel_task);
2157
2158         spl_t s = splsched();
2159         /*
2160          * Use the saved state area of the thread structure
2161          * to store all info required to handle the AST when
2162          * returning to userspace
2163          */
2164         assert(EXC_GUARD_DECODE_GUARD_TYPE(code));
2165         thread->guard_exc_info.code = code;
2166         thread->guard_exc_info.subcode = subcode;
2167         thread_ast_set(thread, AST_GUARD);
2168         ast_propagate(thread);
2169
2170         splx(s);
2171 }
2172
2173 /*
2174  *      guard_ast:
2175  *
2176  *      Handle AST_GUARD for a thread. This routine looks at the
2177  *      state saved in the thread structure to determine the cause
2178  *      of this exception. Based on this value, it invokes the
2179  *      appropriate routine which determines other exception related
2180  *      info and raises the exception.
2181  */
2182 void
2183 guard_ast(thread_t t)
2184 {
2185         const mach_exception_data_type_t
2186                 code = t->guard_exc_info.code,
2187                 subcode = t->guard_exc_info.subcode;
2188
2189         switch (EXC_GUARD_DECODE_GUARD_TYPE(code)) {
2190         case GUARD_TYPE_MACH_PORT:
2191                 mach_port_guard_ast(t, code, subcode);
2192                 break;
2193         case GUARD_TYPE_FD:
2194                 fd_guard_ast(t, code, subcode);
2195                 break;
2196 #if CONFIG_VNGUARD
2197         case GUARD_TYPE_VN:
2198                 vn_guard_ast(t, code, subcode);
2199                 break;
2200 #endif
2201         default:
2202                 panic("guard_exc_info %llx %llx", code, subcode);
2203         }
2204 }
2205
2206 static void
2207 thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1)
2208 {
2209         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
2210 #if CONFIG_TELEMETRY
2211                 /*
2212                  * This thread is in danger of violating the CPU usage monitor. Enable telemetry
2213                  * on the entire task so there are micro-stackshots available if and when
2214                  * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots
2215                  * for this thread only; but now that this task is suspect, knowing what all of
2216                  * its threads are up to will be useful.
2217                  */
2218                 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1);
2219 #endif
2220                 return;
2221         }
2222
2223 #if CONFIG_TELEMETRY
2224         /*
2225          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
2226          * exceeded the limit, turn telemetry off for the task.
2227          */
2228         telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0);
2229 #endif
2230
2231         if (warning == 0) {
2232                 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU();
2233         }
2234 }
2235
2236 void __attribute__((noinline))
2237 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void)
2238 {
2239         int          pid                = 0;
2240         task_t           task                           = current_task();
2241         thread_t     thread             = current_thread();
2242         uint64_t     tid                = thread->thread_id;
2243         const char       *procname          = "unknown";
2244         time_value_t thread_total_time  = {0, 0};
2245         time_value_t thread_system_time;
2246         time_value_t thread_user_time;
2247         int          action;
2248         uint8_t      percentage;
2249         uint32_t     usage_percent = 0;
2250         uint32_t     interval_sec;
2251         uint64_t     interval_ns;
2252         uint64_t     balance_ns;
2253         boolean_t        fatal = FALSE;
2254         boolean_t        send_exc_resource = TRUE; /* in addition to RESOURCE_NOTIFY */
2255         kern_return_t   kr;
2256
2257 #ifdef EXC_RESOURCE_MONITORS
2258         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
2259 #endif /* EXC_RESOURCE_MONITORS */
2260         struct ledger_entry_info        lei;
2261
2262         assert(thread->t_threadledger != LEDGER_NULL);
2263
2264         /*
2265          * Extract the fatal bit and suspend the monitor (which clears the bit).
2266          */
2267         task_lock(task);
2268         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) {
2269                 fatal = TRUE;
2270                 send_exc_resource = TRUE;
2271         }
2272         /* Only one thread can be here at a time.  Whichever makes it through
2273            first will successfully suspend the monitor and proceed to send the
2274            notification.  Other threads will get an error trying to suspend the
2275            monitor and give up on sending the notification.  In the first release,
2276            the monitor won't be resumed for a number of seconds, but we may
2277            eventually need to handle low-latency resume.
2278          */
2279         kr = task_suspend_cpumon(task);
2280         task_unlock(task);
2281         if (kr == KERN_INVALID_ARGUMENT)        return;
2282
2283 #ifdef MACH_BSD
2284         pid = proc_selfpid();
2285         if (task->bsd_info != NULL) {
2286                 procname = proc_name_address(task->bsd_info);
2287         }
2288 #endif
2289
2290         thread_get_cpulimit(&action, &percentage, &interval_ns);
2291
2292         interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC);
2293
2294         thread_read_times(thread, &thread_user_time, &thread_system_time);
2295         time_value_add(&thread_total_time, &thread_user_time);
2296         time_value_add(&thread_total_time, &thread_system_time);
2297         ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei);
2298
2299         /* credit/debit/balance/limit are in absolute time units;
2300            the refill info is in nanoseconds. */
2301         absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns);
2302         if (lei.lei_last_refill > 0) {
2303                 usage_percent = (uint32_t)((balance_ns*100ULL) / lei.lei_last_refill);
2304         }
2305
2306         /* TODO: show task total runtime (via TASK_ABSOLUTETIME_INFO)? */
2307         printf("process %s[%d] thread %llu caught burning CPU! "
2308                "It used more than %d%% CPU over %u seconds "
2309                "(actual recent usage: %d%% over ~%llu seconds).  "
2310                "Thread lifetime cpu usage %d.%06ds, (%d.%06d user, %d.%06d sys) "
2311                "ledger balance: %lld mabs credit: %lld mabs debit: %lld mabs "
2312                "limit: %llu mabs period: %llu ns last refill: %llu ns%s.\n",
2313                procname, pid, tid,
2314                percentage, interval_sec,
2315                usage_percent,
2316                (lei.lei_last_refill + NSEC_PER_SEC/2) / NSEC_PER_SEC,
2317                thread_total_time.seconds, thread_total_time.microseconds,
2318                thread_user_time.seconds, thread_user_time.microseconds,
2319                thread_system_time.seconds,thread_system_time.microseconds,
2320                lei.lei_balance, lei.lei_credit, lei.lei_debit,
2321                lei.lei_limit, lei.lei_refill_period, lei.lei_last_refill,
2322                (fatal ? " [fatal violation]" : ""));
2323
2324         /*
2325            For now, send RESOURCE_NOTIFY in parallel with EXC_RESOURCE.  Once
2326            we have logging parity, we will stop sending EXC_RESOURCE (24508922).
2327          */
2328
2329         /* RESOURCE_NOTIFY MIG specifies nanoseconds of CPU time */
2330         lei.lei_balance = balance_ns;
2331         absolutetime_to_nanoseconds(lei.lei_limit, &lei.lei_limit);
2332         trace_resource_violation(RMON_CPUUSAGE_VIOLATED, &lei);
2333         kr = send_resource_violation(send_cpu_usage_violation, task, &lei,
2334                                                                  fatal ? kRNFatalLimitFlag : 0);
2335         if (kr) {
2336                 printf("send_resource_violation(CPU usage, ...): error %#x\n", kr);
2337         }
2338
2339 #ifdef EXC_RESOURCE_MONITORS
2340         if (send_exc_resource) {
2341                 if (disable_exc_resource) {
2342                         printf("process %s[%d] thread %llu caught burning CPU! "
2343                                    "EXC_RESOURCE%s supressed by a boot-arg\n",
2344                                    procname, pid, tid, fatal ? " (and termination)" : "");
2345                         return;
2346                 }
2347
2348                 if (audio_active) {
2349                         printf("process %s[%d] thread %llu caught burning CPU! "
2350                            "EXC_RESOURCE & termination supressed due to audio playback\n",
2351                                    procname, pid, tid);
2352                         return;
2353                 }
2354         }
2355
2356
2357         if (send_exc_resource) {
2358                 code[0] = code[1] = 0;
2359                 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU);
2360                 if (fatal) {
2361                         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR_FATAL);
2362                 }else {
2363                         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR);
2364                 }
2365                 EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec);
2366                 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], percentage);
2367                 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent);
2368                 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
2369         }
2370 #endif /* EXC_RESOURCE_MONITORS */
2371
2372         if (fatal) {
2373 #if CONFIG_JETSAM
2374                 jetsam_on_ledger_cpulimit_exceeded();
2375 #else
2376                 task_terminate_internal(task);
2377 #endif
2378         }
2379 }
2380
2381 void thread_update_io_stats(thread_t thread, int size, int io_flags)
2382 {
2383         int io_tier;
2384
2385         if (thread->thread_io_stats == NULL || thread->task->task_io_stats == NULL)
2386                 return;
2387
2388         if (io_flags & DKIO_READ) {
2389                 UPDATE_IO_STATS(thread->thread_io_stats->disk_reads, size);
2390                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->disk_reads, size);
2391         }
2392
2393         if (io_flags & DKIO_META) {
2394                 UPDATE_IO_STATS(thread->thread_io_stats->metadata, size);
2395                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->metadata, size);
2396         }
2397
2398         if (io_flags & DKIO_PAGING) {
2399                 UPDATE_IO_STATS(thread->thread_io_stats->paging, size);
2400                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->paging, size);
2401         }
2402
2403         io_tier = ((io_flags & DKIO_TIER_MASK) >> DKIO_TIER_SHIFT);
2404         assert (io_tier < IO_NUM_PRIORITIES);
2405
2406         UPDATE_IO_STATS(thread->thread_io_stats->io_priority[io_tier], size);
2407         UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->io_priority[io_tier], size);
2408
2409         /* Update Total I/O Counts */
2410         UPDATE_IO_STATS(thread->thread_io_stats->total_io, size);
2411         UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->total_io, size);
2412
2413         if (!(io_flags & DKIO_READ)) {
2414                 DTRACE_IO3(physical_writes, struct task *, thread->task, uint32_t, size, int, io_flags);
2415                 ledger_credit(thread->task->ledger, task_ledgers.physical_writes, size);
2416         }
2417 }
2418
2419 static void
2420 init_thread_ledgers(void) {
2421         ledger_template_t t;
2422         int idx;
2423
2424         assert(thread_ledger_template == NULL);
2425
2426         if ((t = ledger_template_create("Per-thread ledger")) == NULL)
2427                 panic("couldn't create thread ledger template");
2428
2429         if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) {
2430                 panic("couldn't create cpu_time entry for thread ledger template");
2431         }
2432
2433         if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) {
2434                 panic("couldn't set thread ledger callback for cpu_time entry");
2435         }
2436
2437         thread_ledgers.cpu_time = idx;
2438
2439         ledger_template_complete(t);
2440         thread_ledger_template = t;
2441 }
2442
2443 /*
2444  * Returns currently applied CPU usage limit, or 0/0 if none is applied.
2445  */
2446 int
2447 thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns)
2448 {
2449         int64_t         abstime = 0;
2450         uint64_t        limittime = 0;
2451         thread_t        thread = current_thread();
2452
2453         *percentage  = 0;
2454         *interval_ns = 0;
2455         *action      = 0;
2456
2457         if (thread->t_threadledger == LEDGER_NULL) {
2458                 /*
2459                  * This thread has no per-thread ledger, so it can't possibly
2460                  * have a CPU limit applied.
2461                  */
2462                 return (KERN_SUCCESS);
2463         }
2464
2465         ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns);
2466         ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime);
2467
2468         if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) {
2469                 /*
2470                  * This thread's CPU time ledger has no period or limit; so it
2471                  * doesn't have a CPU limit applied.
2472                  */
2473                  return (KERN_SUCCESS);
2474         }
2475
2476         /*
2477          * This calculation is the converse to the one in thread_set_cpulimit().
2478          */
2479         absolutetime_to_nanoseconds(abstime, &limittime);
2480         *percentage = (limittime * 100ULL) / *interval_ns;
2481         assert(*percentage <= 100);
2482
2483         if (thread->options & TH_OPT_PROC_CPULIMIT) {
2484                 assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0);
2485
2486                 *action = THREAD_CPULIMIT_BLOCK;
2487         } else if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2488                 assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0);
2489
2490                 *action = THREAD_CPULIMIT_EXCEPTION;
2491         } else {
2492                 *action = THREAD_CPULIMIT_DISABLE;
2493         }
2494
2495         return (KERN_SUCCESS);
2496 }
2497
2498 /*
2499  * Set CPU usage limit on a thread.
2500  *
2501  * Calling with percentage of 0 will unset the limit for this thread.
2502  */
2503 int
2504 thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns)
2505 {
2506         thread_t        thread = current_thread();
2507         ledger_t        l;
2508         uint64_t        limittime = 0;
2509         uint64_t        abstime = 0;
2510
2511         assert(percentage <= 100);
2512
2513         if (action == THREAD_CPULIMIT_DISABLE) {
2514                 /*
2515                  * Remove CPU limit, if any exists.
2516                  */
2517                 if (thread->t_threadledger != LEDGER_NULL) {
2518                         l = thread->t_threadledger;
2519                         ledger_set_limit(l, thread_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
2520                         ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_IGNORE);
2521                         thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT);
2522                 }
2523
2524                 return (0);
2525         }
2526
2527         if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) {
2528                 return (KERN_INVALID_ARGUMENT);
2529         }
2530
2531         l = thread->t_threadledger;
2532         if (l == LEDGER_NULL) {
2533                 /*
2534                  * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active.
2535                  */
2536                 if ((l = ledger_instantiate(thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL)
2537                         return (KERN_RESOURCE_SHORTAGE);
2538
2539                 /*
2540                  * We are the first to create this thread's ledger, so only activate our entry.
2541                  */
2542                 ledger_entry_setactive(l, thread_ledgers.cpu_time);
2543                 thread->t_threadledger = l;
2544         }
2545
2546         /*
2547          * The limit is specified as a percentage of CPU over an interval in nanoseconds.
2548          * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit.
2549          */
2550         limittime = (interval_ns * percentage) / 100;
2551         nanoseconds_to_absolutetime(limittime, &abstime);
2552         ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct);
2553         /*
2554          * Refill the thread's allotted CPU time every interval_ns nanoseconds.
2555          */
2556         ledger_set_period(l, thread_ledgers.cpu_time, interval_ns);
2557
2558         if (action == THREAD_CPULIMIT_EXCEPTION) {
2559                 /*
2560                  * We don't support programming the CPU usage monitor on a task if any of its
2561                  * threads have a per-thread blocking CPU limit configured.
2562                  */
2563                 if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2564                         panic("CPU usage monitor activated, but blocking thread limit exists");
2565                 }
2566
2567                 /*
2568                  * Make a note that this thread's CPU limit is being used for the task-wide CPU
2569                  * usage monitor. We don't have to arm the callback which will trigger the
2570                  * exception, because that was done for us in ledger_instantiate (because the
2571                  * ledger template used has a default callback).
2572                  */
2573                 thread->options |= TH_OPT_PROC_CPULIMIT;
2574         } else {
2575                 /*
2576                  * We deliberately override any CPU limit imposed by a task-wide limit (eg
2577                  * CPU usage monitor).
2578                  */
2579                 thread->options &= ~TH_OPT_PROC_CPULIMIT;
2580
2581                 thread->options |= TH_OPT_PRVT_CPULIMIT;
2582                 /* The per-thread ledger template by default has a callback for CPU time */
2583                 ledger_disable_callback(l, thread_ledgers.cpu_time);
2584                 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
2585         }
2586
2587         return (0);
2588 }
2589
2590 static void
2591 sched_call_null(
2592 __unused        int                     type,
2593 __unused        thread_t        thread)
2594 {
2595         return;
2596 }
2597
2598 void
2599 thread_sched_call(
2600         thread_t                thread,
2601         sched_call_t    call)
2602 {
2603         thread->sched_call = (call != NULL)? call: sched_call_null;
2604 }
2605
2606 sched_call_t
2607 thread_disable_sched_call(
2608         thread_t                thread,
2609         sched_call_t    call)
2610 {
2611         if (call) {
2612                 spl_t s = splsched();
2613                 thread_lock(thread);
2614                 if (thread->sched_call == call) {
2615                         thread->sched_call = sched_call_null;
2616                 } else {
2617                         call = NULL;
2618                 }
2619                 thread_unlock(thread);
2620                 splx(s);
2621         }
2622         return call;
2623 }
2624
2625 void
2626 thread_reenable_sched_call(
2627         thread_t                thread,
2628         sched_call_t    call)
2629 {
2630         if (call) {
2631                 spl_t s = splsched();
2632                 thread_lock(thread);
2633                 thread_sched_call(thread, call);
2634                 thread_unlock(thread);
2635                 splx(s);
2636         }
2637 }
2638
2639 void
2640 thread_static_param(
2641         thread_t                thread,
2642         boolean_t               state)
2643 {
2644         thread_mtx_lock(thread);
2645         thread->static_param = state;
2646         thread_mtx_unlock(thread);
2647 }
2648
2649 uint64_t
2650 thread_tid(
2651         thread_t        thread)
2652 {
2653         return (thread != THREAD_NULL? thread->thread_id: 0);
2654 }
2655
2656 uint16_t        thread_set_tag(thread_t th, uint16_t tag) {
2657         return thread_set_tag_internal(th, tag);
2658 }
2659 uint16_t        thread_get_tag(thread_t th) {
2660         return thread_get_tag_internal(th);
2661 }
2662
2663 uint64_t
2664 thread_dispatchqaddr(
2665         thread_t                thread)
2666 {
2667         uint64_t        dispatchqueue_addr;
2668         uint64_t        thread_handle;
2669
2670         if (thread == THREAD_NULL)
2671                 return 0;
2672
2673         thread_handle = thread->machine.cthread_self;
2674         if (thread_handle == 0)
2675                 return 0;
2676
2677         if (thread->inspection == TRUE)
2678                 dispatchqueue_addr = thread_handle + get_task_dispatchqueue_offset(thread->task);
2679         else if (thread->task->bsd_info)
2680                 dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
2681         else
2682                 dispatchqueue_addr = 0;
2683
2684         return dispatchqueue_addr;
2685 }
2686
2687 uint64_t
2688 thread_rettokern_addr(
2689         thread_t                thread)
2690 {
2691         uint64_t        rettokern_addr;
2692         uint64_t        rettokern_offset;
2693         uint64_t        thread_handle;
2694
2695         if (thread == THREAD_NULL)
2696                 return 0;
2697
2698         thread_handle = thread->machine.cthread_self;
2699         if (thread_handle == 0)
2700                 return 0;
2701
2702         if (thread->task->bsd_info) {
2703                 rettokern_offset = get_return_to_kernel_offset_from_proc(thread->task->bsd_info);
2704
2705                 /* Return 0 if return to kernel offset is not initialized. */
2706                 if (rettokern_offset == 0) {
2707                         rettokern_addr = 0;
2708                 } else {
2709                         rettokern_addr = thread_handle + rettokern_offset;
2710                 }
2711         } else {
2712                 rettokern_addr = 0;
2713         }
2714
2715         return rettokern_addr;
2716 }
2717
2718 /*
2719  * Export routines to other components for things that are done as macros
2720  * within the osfmk component.
2721  */
2722
2723 #undef thread_reference
2724 void thread_reference(thread_t thread);
2725 void
2726 thread_reference(
2727         thread_t        thread)
2728 {
2729         if (thread != THREAD_NULL)
2730                 thread_reference_internal(thread);
2731 }
2732
2733 #undef thread_should_halt
2734
2735 boolean_t
2736 thread_should_halt(
2737         thread_t                th)
2738 {
2739         return (thread_should_halt_fast(th));
2740 }
2741
2742 /*
2743  * thread_set_voucher_name - reset the voucher port name bound to this thread
2744  *
2745  * Conditions:  nothing locked
2746  *
2747  *      If we already converted the previous name to a cached voucher
2748  *      reference, then we discard that reference here.  The next lookup
2749  *      will cache it again.
2750  */
2751
2752 kern_return_t
2753 thread_set_voucher_name(mach_port_name_t voucher_name)
2754 {
2755         thread_t thread = current_thread();
2756         ipc_voucher_t new_voucher = IPC_VOUCHER_NULL;
2757         ipc_voucher_t voucher;
2758         ledger_t bankledger = NULL;
2759         thread_group_t banktg = NULL;
2760
2761         if (MACH_PORT_DEAD == voucher_name)
2762                 return KERN_INVALID_RIGHT;
2763
2764         /*
2765          * agressively convert to voucher reference
2766          */
2767         if (MACH_PORT_VALID(voucher_name)) {
2768                 new_voucher = convert_port_name_to_voucher(voucher_name);
2769                 if (IPC_VOUCHER_NULL == new_voucher)
2770                         return KERN_INVALID_ARGUMENT;
2771         }
2772         bank_get_bank_ledger_and_thread_group(new_voucher, &bankledger, &banktg);
2773
2774         thread_mtx_lock(thread);
2775         voucher = thread->ith_voucher;
2776         thread->ith_voucher_name = voucher_name;
2777         thread->ith_voucher = new_voucher;
2778         thread_mtx_unlock(thread);
2779
2780         bank_swap_thread_bank_ledger(thread, bankledger);
2781
2782         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2783                                   MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2784                                   (uintptr_t)thread_tid(thread),
2785                                   (uintptr_t)voucher_name,
2786                                   VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
2787                                   1, 0);
2788
2789         if (IPC_VOUCHER_NULL != voucher)
2790                 ipc_voucher_release(voucher);
2791
2792         return KERN_SUCCESS;
2793 }
2794
2795 /*
2796  *  thread_get_mach_voucher - return a voucher reference for the specified thread voucher
2797  *
2798  *  Conditions:  nothing locked
2799  *
2800  *  A reference to the voucher may be lazily pending, if someone set the voucher name
2801  *  but nobody has done a lookup yet.  In that case, we'll have to do the equivalent
2802  *  lookup here.
2803  *
2804  *  NOTE:       At the moment, there is no distinction between the current and effective
2805  *              vouchers because we only set them at the thread level currently.
2806  */
2807 kern_return_t
2808 thread_get_mach_voucher(
2809         thread_act_t            thread,
2810         mach_voucher_selector_t __unused which,
2811         ipc_voucher_t           *voucherp)
2812 {
2813         ipc_voucher_t           voucher;
2814         mach_port_name_t        voucher_name;
2815
2816         if (THREAD_NULL == thread)
2817                 return KERN_INVALID_ARGUMENT;
2818
2819         thread_mtx_lock(thread);
2820         voucher = thread->ith_voucher;
2821
2822         /* if already cached, just return a ref */
2823         if (IPC_VOUCHER_NULL != voucher) {
2824                 ipc_voucher_reference(voucher);
2825                 thread_mtx_unlock(thread);
2826                 *voucherp = voucher;
2827                 return KERN_SUCCESS;
2828         }
2829
2830         voucher_name = thread->ith_voucher_name;
2831
2832         /* convert the name to a port, then voucher reference */
2833         if (MACH_PORT_VALID(voucher_name)) {
2834                 ipc_port_t port;
2835
2836                 if (KERN_SUCCESS !=
2837                     ipc_object_copyin(thread->task->itk_space, voucher_name,
2838                                       MACH_MSG_TYPE_COPY_SEND, (ipc_object_t *)&port)) {
2839                         thread->ith_voucher_name = MACH_PORT_NULL;
2840                         thread_mtx_unlock(thread);
2841                         *voucherp = IPC_VOUCHER_NULL;
2842                         return KERN_SUCCESS;
2843                 }
2844
2845                 /* convert to a voucher ref to return, and cache a ref on thread */
2846                 voucher = convert_port_to_voucher(port);
2847                 ipc_voucher_reference(voucher);
2848                 thread->ith_voucher = voucher;
2849                 thread_mtx_unlock(thread);
2850
2851                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2852                                           MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2853                                           (uintptr_t)thread_tid(thread),
2854                                           (uintptr_t)port,
2855                                           VM_KERNEL_ADDRPERM((uintptr_t)voucher),
2856                                           2, 0);
2857
2858
2859                 ipc_port_release_send(port);
2860         } else
2861                 thread_mtx_unlock(thread);
2862
2863         *voucherp = voucher;
2864         return KERN_SUCCESS;
2865 }
2866
2867 /*
2868  *  thread_set_mach_voucher - set a voucher reference for the specified thread voucher
2869  *
2870  *  Conditions: callers holds a reference on the voucher.
2871  *              nothing locked.
2872  *
2873  *  We grab another reference to the voucher and bind it to the thread.  Any lazy
2874  *  binding is erased.  The old voucher reference associated with the thread is
2875  *  discarded.
2876  */
2877 kern_return_t
2878 thread_set_mach_voucher(
2879         thread_t                thread,
2880         ipc_voucher_t           voucher)
2881 {
2882         ipc_voucher_t old_voucher;
2883         ledger_t bankledger = NULL;
2884         thread_group_t banktg = NULL;
2885
2886         if (THREAD_NULL == thread)
2887                 return KERN_INVALID_ARGUMENT;
2888
2889         if (thread != current_thread() && thread->started)
2890                 return KERN_INVALID_ARGUMENT;
2891
2892         ipc_voucher_reference(voucher);
2893         bank_get_bank_ledger_and_thread_group(voucher, &bankledger, &banktg);
2894
2895         thread_mtx_lock(thread);
2896         old_voucher = thread->ith_voucher;
2897         thread->ith_voucher = voucher;
2898         thread->ith_voucher_name = MACH_PORT_NULL;
2899         thread_mtx_unlock(thread);
2900
2901         bank_swap_thread_bank_ledger(thread, bankledger);
2902
2903         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2904                                   MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2905                                   (uintptr_t)thread_tid(thread),
2906                                   (uintptr_t)MACH_PORT_NULL,
2907                                   VM_KERNEL_ADDRPERM((uintptr_t)voucher),
2908                                   3, 0);
2909
2910         ipc_voucher_release(old_voucher);
2911
2912         return KERN_SUCCESS;
2913 }
2914
2915 /*
2916  *  thread_swap_mach_voucher - swap a voucher reference for the specified thread voucher
2917  *
2918  *  Conditions: callers holds a reference on the new and presumed old voucher(s).
2919  *              nothing locked.
2920  *
2921  *  If the old voucher is still the same as passed in, replace it with new voucher
2922  *  and discard the old (and the reference passed in).  Otherwise, discard the new
2923  *  and return an updated old voucher.
2924  */
2925 kern_return_t
2926 thread_swap_mach_voucher(
2927         thread_t                thread,
2928         ipc_voucher_t           new_voucher,
2929         ipc_voucher_t           *in_out_old_voucher)
2930 {
2931         mach_port_name_t old_voucher_name;
2932         ipc_voucher_t old_voucher;
2933         ledger_t bankledger = NULL;
2934         thread_group_t banktg = NULL;
2935
2936         if (THREAD_NULL == thread)
2937                 return KERN_INVALID_TASK;
2938
2939         if (thread != current_thread() && thread->started)
2940                 return KERN_INVALID_ARGUMENT;
2941
2942         bank_get_bank_ledger_and_thread_group(new_voucher, &bankledger, &banktg);
2943
2944         thread_mtx_lock(thread);
2945
2946         old_voucher = thread->ith_voucher;
2947
2948         if (IPC_VOUCHER_NULL == old_voucher) {
2949                 old_voucher_name = thread->ith_voucher_name;
2950
2951                 /* perform lazy binding if needed */
2952                 if (MACH_PORT_VALID(old_voucher_name)) {
2953                         old_voucher = convert_port_name_to_voucher(old_voucher_name);
2954                         thread->ith_voucher_name = MACH_PORT_NULL;
2955                         thread->ith_voucher = old_voucher;
2956
2957                         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2958                                                   MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2959                                                   (uintptr_t)thread_tid(thread),
2960                                                   (uintptr_t)old_voucher_name,
2961                                                   VM_KERNEL_ADDRPERM((uintptr_t)old_voucher),
2962                                                   4, 0);
2963
2964                 }
2965         }
2966
2967         /* swap in new voucher, if old voucher matches the one supplied */
2968         if (old_voucher == *in_out_old_voucher) {
2969                 ipc_voucher_reference(new_voucher);
2970                 thread->ith_voucher = new_voucher;
2971                 thread->ith_voucher_name = MACH_PORT_NULL;
2972                 thread_mtx_unlock(thread);
2973                 bank_swap_thread_bank_ledger(thread, bankledger);
2974
2975                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2976                                           MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2977                                           (uintptr_t)thread_tid(thread),
2978                                           (uintptr_t)MACH_PORT_NULL,
2979                                           VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
2980                                           5, 0);
2981
2982                 ipc_voucher_release(old_voucher);
2983
2984                 *in_out_old_voucher = IPC_VOUCHER_NULL;
2985                 return KERN_SUCCESS;
2986         }
2987
2988         /* Otherwise, just return old voucher reference */
2989         ipc_voucher_reference(old_voucher);
2990         thread_mtx_unlock(thread);
2991         *in_out_old_voucher = old_voucher;
2992         return KERN_SUCCESS;
2993 }
2994
2995 /*
2996  *  thread_get_current_voucher_origin_pid - get the pid of the originator of the current voucher.
2997  */
2998 kern_return_t
2999 thread_get_current_voucher_origin_pid(
3000         int32_t      *pid)
3001 {
3002         uint32_t buf_size;
3003         kern_return_t kr;
3004         thread_t thread = current_thread();
3005
3006         buf_size = sizeof(*pid);
3007         kr = mach_voucher_attr_command(thread->ith_voucher,
3008                 MACH_VOUCHER_ATTR_KEY_BANK,
3009                 BANK_ORIGINATOR_PID,
3010                 NULL,
3011                 0,
3012                 (mach_voucher_attr_content_t)pid,
3013                 &buf_size);
3014
3015         return kr;
3016 }
3017
3018
3019 boolean_t
3020 thread_has_thread_name(thread_t th)
3021 {
3022         if ((th) && (th->uthread)) {
3023                 return bsd_hasthreadname(th->uthread);
3024         }
3025
3026         /*
3027          * This is an odd case; clients may set the thread name based on the lack of
3028          * a name, but in this context there is no uthread to attach the name to.
3029          */
3030         return FALSE;
3031 }
3032
3033 void
3034 thread_set_thread_name(thread_t th, const char* name)
3035 {
3036         if ((th) && (th->uthread) && name) {
3037                 bsd_setthreadname(th->uthread, name);
3038         }
3039 }
3040
3041 /*
3042  * thread_enable_send_importance - set/clear the SEND_IMPORTANCE thread option bit.
3043  */
3044 void thread_enable_send_importance(thread_t thread, boolean_t enable)
3045 {
3046         if (enable == TRUE)
3047                 thread->options |= TH_OPT_SEND_IMPORTANCE;
3048         else
3049                 thread->options &= ~TH_OPT_SEND_IMPORTANCE;
3050 }
3051
3052 /*
3053  * thread_set_allocation_name - .
3054  */
3055
3056 kern_allocation_name_t thread_set_allocation_name(kern_allocation_name_t new_name)
3057 {
3058         kern_allocation_name_t ret;
3059         thread_kernel_state_t kstate = thread_get_kernel_state(current_thread());
3060         ret = kstate->allocation_name;
3061         // fifo
3062         if (!new_name || !kstate->allocation_name) kstate->allocation_name = new_name;
3063         return ret;
3064 }
3065
3066 #if CONFIG_DTRACE
3067 uint32_t dtrace_get_thread_predcache(thread_t thread)
3068 {
3069         if (thread != THREAD_NULL)
3070                 return thread->t_dtrace_predcache;
3071         else
3072                 return 0;
3073 }
3074
3075 int64_t dtrace_get_thread_vtime(thread_t thread)
3076 {
3077         if (thread != THREAD_NULL)
3078                 return thread->t_dtrace_vtime;
3079         else
3080                 return 0;
3081 }
3082
3083 int dtrace_get_thread_last_cpu_id(thread_t thread)
3084 {
3085         if ((thread != THREAD_NULL) && (thread->last_processor != PROCESSOR_NULL)) {
3086                 return thread->last_processor->cpu_id;
3087         } else {
3088                 return -1;
3089         }
3090 }
3091
3092 int64_t dtrace_get_thread_tracing(thread_t thread)
3093 {
3094         if (thread != THREAD_NULL)
3095                 return thread->t_dtrace_tracing;
3096         else
3097                 return 0;
3098 }
3099
3100 boolean_t dtrace_get_thread_reentering(thread_t thread)
3101 {
3102         if (thread != THREAD_NULL)
3103                 return (thread->options & TH_OPT_DTRACE) ? TRUE : FALSE;
3104         else
3105                 return 0;
3106 }
3107
3108 vm_offset_t dtrace_get_kernel_stack(thread_t thread)
3109 {
3110         if (thread != THREAD_NULL)
3111                 return thread->kernel_stack;
3112         else
3113                 return 0;
3114 }
3115
3116 #if KASAN
3117 struct kasan_thread_data *
3118 kasan_get_thread_data(thread_t thread)
3119 {
3120         return &thread->kasan_data;
3121 }
3122 #endif
3123
3124 int64_t dtrace_calc_thread_recent_vtime(thread_t thread)
3125 {
3126         if (thread != THREAD_NULL) {
3127                 processor_t             processor = current_processor();
3128                 uint64_t                                abstime = mach_absolute_time();
3129                 timer_t                                 timer;
3130
3131                 timer = PROCESSOR_DATA(processor, thread_timer);
3132
3133                 return timer_grab(&(thread->system_timer)) + timer_grab(&(thread->user_timer)) +
3134                                 (abstime - timer->tstamp); /* XXX need interrupts off to prevent missed time? */
3135         } else
3136                 return 0;
3137 }
3138
3139 void dtrace_set_thread_predcache(thread_t thread, uint32_t predcache)
3140 {
3141         if (thread != THREAD_NULL)
3142                 thread->t_dtrace_predcache = predcache;
3143 }
3144
3145 void dtrace_set_thread_vtime(thread_t thread, int64_t vtime)
3146 {
3147         if (thread != THREAD_NULL)
3148                 thread->t_dtrace_vtime = vtime;
3149 }
3150
3151 void dtrace_set_thread_tracing(thread_t thread, int64_t accum)
3152 {
3153         if (thread != THREAD_NULL)
3154                 thread->t_dtrace_tracing = accum;
3155 }
3156
3157 void dtrace_set_thread_reentering(thread_t thread, boolean_t vbool)
3158 {
3159         if (thread != THREAD_NULL) {
3160                 if (vbool)
3161                         thread->options |= TH_OPT_DTRACE;
3162                 else
3163                         thread->options &= (~TH_OPT_DTRACE);
3164         }
3165 }
3166
3167 vm_offset_t dtrace_set_thread_recover(thread_t thread, vm_offset_t recover)
3168 {
3169         vm_offset_t prev = 0;
3170
3171         if (thread != THREAD_NULL) {
3172                 prev = thread->recover;
3173                 thread->recover = recover;
3174         }
3175         return prev;
3176 }
3177
3178 void dtrace_thread_bootstrap(void)
3179 {
3180         task_t task = current_task();
3181
3182         if (task->thread_count == 1) {
3183                 thread_t thread = current_thread();
3184                 if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) {
3185                         thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS;
3186                         DTRACE_PROC(exec__success);
3187                         KDBG(BSDDBG_CODE(DBG_BSD_PROC,BSD_PROC_EXEC),
3188                              task_pid(task));
3189                 }
3190                 DTRACE_PROC(start);
3191         }
3192         DTRACE_PROC(lwp__start);
3193
3194 }
3195
3196 void
3197 dtrace_thread_didexec(thread_t thread)
3198 {
3199         thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS;
3200 }
3201 #endif /* CONFIG_DTRACE */