osfmk/kern/thread.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   kern/thread.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub
  61  *      Date:   1986
  62  *
  63  *      Thread management primitives implementation.
  64  */
  65 /*
  66  * Copyright (c) 1993 The University of Utah and
  67  * the Computer Systems Laboratory (CSL).  All rights reserved.
  68  *
  69  * Permission to use, copy, modify and distribute this software and its
  70  * documentation is hereby granted, provided that both the copyright
  71  * notice and this permission notice appear in all copies of the
  72  * software, derivative works or modified versions, and any portions
  73  * thereof, and that both notices appear in supporting documentation.
  74  *
  75  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  76  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  77  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  78  *
  79  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  80  * improvements that they make and grant CSL redistribution rights.
  81  *
  82  */
  83
  84 #include <mach/mach_types.h>
  85 #include <mach/boolean.h>
  86 #include <mach/policy.h>
  87 #include <mach/thread_info.h>
  88 #include <mach/thread_special_ports.h>
  89 #include <mach/thread_status.h>
  90 #include <mach/time_value.h>
  91 #include <mach/vm_param.h>
  92
  93 #include <machine/thread.h>
  94 #include <machine/pal_routines.h>
  95 #include <machine/limits.h>
  96
  97 #include <kern/kern_types.h>
  98 #include <kern/kalloc.h>
  99 #include <kern/cpu_data.h>
 100 #include <kern/counters.h>
 101 #include <kern/extmod_statistics.h>
 102 #include <kern/ipc_mig.h>
 103 #include <kern/ipc_tt.h>
 104 #include <kern/mach_param.h>
 105 #include <kern/machine.h>
 106 #include <kern/misc_protos.h>
 107 #include <kern/processor.h>
 108 #include <kern/queue.h>
 109 #include <kern/sched.h>
 110 #include <kern/sched_prim.h>
 111 #include <kern/sync_lock.h>
 112 #include <kern/syscall_subr.h>
 113 #include <kern/task.h>
 114 #include <kern/thread.h>
 115 #include <kern/thread_group.h>
 116 #include <kern/coalition.h>
 117 #include <kern/host.h>
 118 #include <kern/zalloc.h>
 119 #include <kern/assert.h>
 120 #include <kern/exc_resource.h>
 121 #include <kern/exc_guard.h>
 122 #include <kern/telemetry.h>
 123 #include <kern/policy_internal.h>
 124
 125 #include <corpses/task_corpse.h>
 126 #if KPC
 127 #include <kern/kpc.h>
 128 #endif
 129
 130 #if MONOTONIC
 131 #include <kern/monotonic.h>
 132 #include <machine/monotonic.h>
 133 #endif /* MONOTONIC */
 134
 135 #include <ipc/ipc_kmsg.h>
 136 #include <ipc/ipc_port.h>
 137 #include <bank/bank_types.h>
 138
 139 #include <vm/vm_kern.h>
 140 #include <vm/vm_pageout.h>
 141
 142 #include <sys/kdebug.h>
 143 #include <sys/bsdtask_info.h>
 144 #include <mach/sdt.h>
 145 #include <san/kasan.h>
 146
 147 #include <stdatomic.h>
 148
 149 /*
 150  * Exported interfaces
 151  */
 152 #include <mach/task_server.h>
 153 #include <mach/thread_act_server.h>
 154 #include <mach/mach_host_server.h>
 155 #include <mach/host_priv_server.h>
 156 #include <mach/mach_voucher_server.h>
 157 #include <kern/policy_internal.h>
 158
 159 static struct zone                      *thread_zone;
 160 static lck_grp_attr_t           thread_lck_grp_attr;
 161 lck_attr_t                                      thread_lck_attr;
 162 lck_grp_t                                       thread_lck_grp;
 163
 164 struct zone                                     *thread_qos_override_zone;
 165
 166 decl_simple_lock_data(static,thread_stack_lock)
 167 static queue_head_t             thread_stack_queue;
 168
 169 decl_simple_lock_data(static,thread_terminate_lock)
 170 static queue_head_t             thread_terminate_queue;
 171
 172 static queue_head_t             crashed_threads_queue;
 173
 174 decl_simple_lock_data(static,thread_exception_lock)
 175 static queue_head_t             thread_exception_queue;
 176
 177 struct thread_exception_elt {
 178         queue_chain_t           elt;
 179         exception_type_t        exception_type;
 180         task_t                  exception_task;
 181         thread_t                exception_thread;
 182 };
 183
 184 static struct thread    thread_template, init_thread;
 185
 186 static void             sched_call_null(
 187                                         int                     type,
 188                                         thread_t        thread);
 189
 190 #ifdef MACH_BSD
 191 extern void proc_exit(void *);
 192 extern mach_exception_data_type_t proc_encode_exit_exception_code(void *);
 193 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 194 extern uint64_t get_return_to_kernel_offset_from_proc(void *p);
 195 extern int      proc_selfpid(void);
 196 extern char *   proc_name_address(void *p);
 197 #endif /* MACH_BSD */
 198
 199 extern int disable_exc_resource;
 200 extern int audio_active;
 201 extern int debug_task;
 202 int thread_max = CONFIG_THREAD_MAX;     /* Max number of threads */
 203 int task_threadmax = CONFIG_THREAD_MAX;
 204
 205 static uint64_t         thread_unique_id = 100;
 206
 207 struct _thread_ledger_indices thread_ledgers = { -1 };
 208 static ledger_template_t thread_ledger_template = NULL;
 209 static void init_thread_ledgers(void);
 210
 211 #if CONFIG_JETSAM
 212 void jetsam_on_ledger_cpulimit_exceeded(void);
 213 #endif
 214
 215 /*
 216  * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry.
 217  *
 218  * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user
 219  *  stacktraces, aka micro-stackshots)
 220  */
 221 #define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70
 222
 223 int cpumon_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 224 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void);
 225
 226 /*
 227  * The smallest interval over which we support limiting CPU consumption is 1ms
 228  */
 229 #define MINIMUM_CPULIMIT_INTERVAL_MS 1
 230
 231 void
 232 thread_bootstrap(void)
 233 {
 234         /*
 235          *      Fill in a template thread for fast initialization.
 236          */
 237
 238 #if MACH_ASSERT
 239         thread_template.thread_magic = THREAD_MAGIC;
 240 #endif /* MACH_ASSERT */
 241
 242         thread_template.runq = PROCESSOR_NULL;
 243
 244         thread_template.ref_count = 2;
 245
 246         thread_template.reason = AST_NONE;
 247         thread_template.at_safe_point = FALSE;
 248         thread_template.wait_event = NO_EVENT64;
 249         thread_template.waitq = NULL;
 250         thread_template.wait_result = THREAD_WAITING;
 251         thread_template.options = THREAD_ABORTSAFE;
 252         thread_template.state = TH_WAIT | TH_UNINT;
 253         thread_template.wake_active = FALSE;
 254         thread_template.continuation = THREAD_CONTINUE_NULL;
 255         thread_template.parameter = NULL;
 256
 257         thread_template.importance = 0;
 258         thread_template.sched_mode = TH_MODE_NONE;
 259         thread_template.sched_flags = 0;
 260         thread_template.saved_mode = TH_MODE_NONE;
 261         thread_template.safe_release = 0;
 262         thread_template.th_sched_bucket = TH_BUCKET_RUN;
 263
 264         thread_template.sfi_class = SFI_CLASS_UNSPECIFIED;
 265         thread_template.sfi_wait_class = SFI_CLASS_UNSPECIFIED;
 266
 267         thread_template.active = 0;
 268         thread_template.started = 0;
 269         thread_template.static_param = 0;
 270         thread_template.policy_reset = 0;
 271
 272         thread_template.base_pri = BASEPRI_DEFAULT;
 273         thread_template.sched_pri = 0;
 274         thread_template.max_priority = 0;
 275         thread_template.task_priority = 0;
 276         thread_template.promotions = 0;
 277         thread_template.pending_promoter_index = 0;
 278         thread_template.pending_promoter[0] = NULL;
 279         thread_template.pending_promoter[1] = NULL;
 280         thread_template.rwlock_count = 0;
 281
 282
 283         thread_template.realtime.deadline = UINT64_MAX;
 284
 285         thread_template.quantum_remaining = 0;
 286         thread_template.last_run_time = 0;
 287         thread_template.last_made_runnable_time = THREAD_NOT_RUNNABLE;
 288         thread_template.last_basepri_change_time = THREAD_NOT_RUNNABLE;
 289         thread_template.same_pri_latency = 0;
 290
 291         thread_template.computation_metered = 0;
 292         thread_template.computation_epoch = 0;
 293
 294 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
 295         thread_template.sched_stamp = 0;
 296         thread_template.pri_shift = INT8_MAX;
 297         thread_template.sched_usage = 0;
 298         thread_template.cpu_usage = thread_template.cpu_delta = 0;
 299 #endif
 300         thread_template.c_switch = thread_template.p_switch = thread_template.ps_switch = 0;
 301
 302 #if MONOTONIC
 303         memset(&thread_template.t_monotonic, 0,
 304                         sizeof(thread_template.t_monotonic));
 305 #endif /* MONOTONIC */
 306
 307         thread_template.bound_processor = PROCESSOR_NULL;
 308         thread_template.last_processor = PROCESSOR_NULL;
 309
 310         thread_template.sched_call = sched_call_null;
 311
 312         timer_init(&thread_template.user_timer);
 313         timer_init(&thread_template.system_timer);
 314         timer_init(&thread_template.ptime);
 315         thread_template.user_timer_save = 0;
 316         thread_template.system_timer_save = 0;
 317         thread_template.vtimer_user_save = 0;
 318         thread_template.vtimer_prof_save = 0;
 319         thread_template.vtimer_rlim_save = 0;
 320         thread_template.vtimer_qos_save  = 0;
 321
 322 #if CONFIG_SCHED_SFI
 323         thread_template.wait_sfi_begin_time = 0;
 324 #endif
 325
 326         thread_template.wait_timer_is_set = FALSE;
 327         thread_template.wait_timer_active = 0;
 328
 329         thread_template.depress_timer_active = 0;
 330
 331         thread_template.recover = (vm_offset_t)NULL;
 332
 333         thread_template.map = VM_MAP_NULL;
 334
 335 #if CONFIG_DTRACE
 336         thread_template.t_dtrace_predcache = 0;
 337         thread_template.t_dtrace_vtime = 0;
 338         thread_template.t_dtrace_tracing = 0;
 339 #endif /* CONFIG_DTRACE */
 340
 341 #if KPERF
 342         thread_template.kperf_flags = 0;
 343         thread_template.kperf_pet_gen = 0;
 344         thread_template.kperf_c_switch = 0;
 345         thread_template.kperf_pet_cnt = 0;
 346 #endif
 347
 348 #if KPC
 349         thread_template.kpc_buf = NULL;
 350 #endif
 351
 352 #if HYPERVISOR
 353         thread_template.hv_thread_target = NULL;
 354 #endif /* HYPERVISOR */
 355
 356 #if (DEVELOPMENT || DEBUG)
 357         thread_template.t_page_creation_throttled_hard = 0;
 358         thread_template.t_page_creation_throttled_soft = 0;
 359 #endif /* DEVELOPMENT || DEBUG */
 360         thread_template.t_page_creation_throttled = 0;
 361         thread_template.t_page_creation_count = 0;
 362         thread_template.t_page_creation_time = 0;
 363
 364         thread_template.affinity_set = NULL;
 365
 366         thread_template.syscalls_unix = 0;
 367         thread_template.syscalls_mach = 0;
 368
 369         thread_template.t_ledger = LEDGER_NULL;
 370         thread_template.t_threadledger = LEDGER_NULL;
 371         thread_template.t_bankledger = LEDGER_NULL;
 372         thread_template.t_deduct_bank_ledger_time = 0;
 373
 374         thread_template.requested_policy = (struct thread_requested_policy) {};
 375         thread_template.effective_policy = (struct thread_effective_policy) {};
 376
 377         bzero(&thread_template.overrides, sizeof(thread_template.overrides));
 378         thread_template.sync_ipc_overrides = 0;
 379
 380         thread_template.iotier_override = THROTTLE_LEVEL_NONE;
 381         thread_template.thread_io_stats = NULL;
 382 #if CONFIG_EMBEDDED
 383         thread_template.taskwatch = NULL;
 384 #endif /* CONFIG_EMBEDDED */
 385         thread_template.thread_callout_interrupt_wakeups = thread_template.thread_callout_platform_idle_wakeups = 0;
 386
 387         thread_template.thread_timer_wakeups_bin_1 = thread_template.thread_timer_wakeups_bin_2 = 0;
 388         thread_template.callout_woken_from_icontext = thread_template.callout_woken_from_platform_idle = 0;
 389
 390         thread_template.thread_tag = 0;
 391
 392         thread_template.ith_voucher_name = MACH_PORT_NULL;
 393         thread_template.ith_voucher = IPC_VOUCHER_NULL;
 394
 395         thread_template.th_work_interval = NULL;
 396
 397         init_thread = thread_template;
 398         machine_set_current_thread(&init_thread);
 399 }
 400
 401 extern boolean_t allow_qos_policy_set;
 402
 403 void
 404 thread_init(void)
 405 {
 406         thread_zone = zinit(
 407                         sizeof(struct thread),
 408                         thread_max * sizeof(struct thread),
 409                         THREAD_CHUNK * sizeof(struct thread),
 410                         "threads");
 411
 412         thread_qos_override_zone = zinit(
 413                 sizeof(struct thread_qos_override),
 414                 4 * thread_max * sizeof(struct thread_qos_override),
 415                 PAGE_SIZE,
 416                 "thread qos override");
 417         zone_change(thread_qos_override_zone, Z_EXPAND, TRUE);
 418         zone_change(thread_qos_override_zone, Z_COLLECT, TRUE);
 419         zone_change(thread_qos_override_zone, Z_CALLERACCT, FALSE);
 420         zone_change(thread_qos_override_zone, Z_NOENCRYPT, TRUE);
 421
 422         lck_grp_attr_setdefault(&thread_lck_grp_attr);
 423         lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr);
 424         lck_attr_setdefault(&thread_lck_attr);
 425
 426         stack_init();
 427
 428         thread_policy_init();
 429
 430         /*
 431          *      Initialize any machine-dependent
 432          *      per-thread structures necessary.
 433          */
 434         machine_thread_init();
 435
 436         if (!PE_parse_boot_argn("cpumon_ustackshots_trigger_pct", &cpumon_ustackshots_trigger_pct,
 437                 sizeof (cpumon_ustackshots_trigger_pct))) {
 438                 cpumon_ustackshots_trigger_pct = CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT;
 439         }
 440
 441         PE_parse_boot_argn("-qos-policy-allow", &allow_qos_policy_set, sizeof(allow_qos_policy_set));
 442
 443         init_thread_ledgers();
 444 }
 445
 446 boolean_t
 447 thread_is_active(thread_t thread)
 448 {
 449         return (thread->active);
 450 }
 451
 452 void
 453 thread_corpse_continue(void)
 454 {
 455         thread_t thread = current_thread();
 456
 457         thread_terminate_internal(thread);
 458
 459         /*
 460          * Handle the thread termination directly
 461          * here instead of returning to userspace.
 462          */
 463         assert(thread->active == FALSE);
 464         thread_ast_clear(thread, AST_APC);
 465         thread_apc_ast(thread);
 466
 467         panic("thread_corpse_continue");
 468         /*NOTREACHED*/
 469 }
 470
 471 static void
 472 thread_terminate_continue(void)
 473 {
 474         panic("thread_terminate_continue");
 475         /*NOTREACHED*/
 476 }
 477
 478 /*
 479  *      thread_terminate_self:
 480  */
 481 void
 482 thread_terminate_self(void)
 483 {
 484         thread_t                thread = current_thread();
 485         task_t                  task;
 486         spl_t                   s;
 487         int threadcnt;
 488
 489         pal_thread_terminate_self(thread);
 490
 491         DTRACE_PROC(lwp__exit);
 492
 493         thread_mtx_lock(thread);
 494
 495         ipc_thread_disable(thread);
 496
 497         thread_mtx_unlock(thread);
 498
 499         s = splsched();
 500         thread_lock(thread);
 501
 502         /*
 503          *      Cancel priority depression, wait for concurrent expirations
 504          *      on other processors.
 505          */
 506         if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
 507                 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
 508
 509                 /* If our priority was low because of a depressed yield, restore it in case we block below */
 510                 thread_recompute_sched_pri(thread, FALSE);
 511
 512                 if (timer_call_cancel(&thread->depress_timer))
 513                         thread->depress_timer_active--;
 514         }
 515
 516         while (thread->depress_timer_active > 0) {
 517                 thread_unlock(thread);
 518                 splx(s);
 519
 520                 delay(1);
 521
 522                 s = splsched();
 523                 thread_lock(thread);
 524         }
 525
 526         thread_sched_call(thread, NULL);
 527
 528         thread_unlock(thread);
 529         splx(s);
 530
 531 #if CONFIG_EMBEDDED
 532         thead_remove_taskwatch(thread);
 533 #endif /* CONFIG_EMBEDDED */
 534
 535         work_interval_thread_terminate(thread);
 536
 537         thread_mtx_lock(thread);
 538
 539         thread_policy_reset(thread);
 540
 541         thread_mtx_unlock(thread);
 542
 543         bank_swap_thread_bank_ledger(thread, NULL);
 544
 545         if (kdebug_enable && bsd_hasthreadname(thread->uthread)) {
 546                 char threadname[MAXTHREADNAMESIZE];
 547                 bsd_getthreadname(thread->uthread, threadname);
 548                 kernel_debug_string_simple(TRACE_STRING_THREADNAME_PREV, threadname);
 549         }
 550
 551         task = thread->task;
 552         uthread_cleanup(task, thread->uthread, task->bsd_info);
 553
 554         if (kdebug_enable && task->bsd_info && !task_is_exec_copy(task)) {
 555                 /* trace out pid before we sign off */
 556                 long dbg_arg1 = 0;
 557                 long dbg_arg2 = 0;
 558
 559                 kdbg_trace_data(thread->task->bsd_info, &dbg_arg1, &dbg_arg2);
 560                 KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE_PID, dbg_arg1, dbg_arg2);
 561         }
 562
 563         /*
 564          * After this subtraction, this thread should never access
 565          * task->bsd_info unless it got 0 back from the hw_atomic_sub.  It
 566          * could be racing with other threads to be the last thread in the
 567          * process, and the last thread in the process will tear down the proc
 568          * structure and zero-out task->bsd_info.
 569          */
 570         threadcnt = hw_atomic_sub(&task->active_thread_count, 1);
 571
 572         /*
 573          * If we are the last thread to terminate and the task is
 574          * associated with a BSD process, perform BSD process exit.
 575          */
 576         if (threadcnt == 0 && task->bsd_info != NULL && !task_is_exec_copy(task)) {
 577                 mach_exception_data_type_t subcode = 0;
 578                 if (kdebug_enable) {
 579                         /* since we're the last thread in this process, trace out the command name too */
 580                         long args[4] = {};
 581                         kdbg_trace_string(thread->task->bsd_info, &args[0], &args[1], &args[2], &args[3]);
 582                         KDBG_RELEASE(TRACE_STRING_PROC_EXIT, args[0], args[1], args[2], args[3]);
 583                 }
 584
 585                 /* Get the exit reason before proc_exit */
 586                 subcode = proc_encode_exit_exception_code(task->bsd_info);
 587                 proc_exit(task->bsd_info);
 588                 /*
 589                  * if there is crash info in task
 590                  * then do the deliver action since this is
 591                  * last thread for this task.
 592                  */
 593                 if (task->corpse_info) {
 594                         task_deliver_crash_notification(task, current_thread(), EXC_RESOURCE, subcode);
 595                 }
 596         }
 597
 598         if (threadcnt == 0) {
 599                 task_lock(task);
 600                 if (task_is_a_corpse_fork(task)) {
 601                         thread_wakeup((event_t)&task->active_thread_count);
 602                 }
 603                 task_unlock(task);
 604         }
 605
 606         uthread_cred_free(thread->uthread);
 607
 608         s = splsched();
 609         thread_lock(thread);
 610
 611         /*
 612          *      Cancel wait timer, and wait for
 613          *      concurrent expirations.
 614          */
 615         if (thread->wait_timer_is_set) {
 616                 thread->wait_timer_is_set = FALSE;
 617
 618                 if (timer_call_cancel(&thread->wait_timer))
 619                         thread->wait_timer_active--;
 620         }
 621
 622         while (thread->wait_timer_active > 0) {
 623                 thread_unlock(thread);
 624                 splx(s);
 625
 626                 delay(1);
 627
 628                 s = splsched();
 629                 thread_lock(thread);
 630         }
 631
 632         /*
 633          *      If there is a reserved stack, release it.
 634          */
 635         if (thread->reserved_stack != 0) {
 636                 stack_free_reserved(thread);
 637                 thread->reserved_stack = 0;
 638         }
 639
 640         /*
 641          *      Mark thread as terminating, and block.
 642          */
 643         thread->state |= TH_TERMINATE;
 644         thread_mark_wait_locked(thread, THREAD_UNINT);
 645         assert((thread->sched_flags & TH_SFLAG_PROMOTED) == 0);
 646         assert(thread->promotions == 0);
 647         assert(!(thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED));
 648         assert(thread->rwlock_count == 0);
 649         thread_unlock(thread);
 650         /* splsched */
 651
 652         thread_block((thread_continue_t)thread_terminate_continue);
 653         /*NOTREACHED*/
 654 }
 655
 656 /* Drop a thread refcount that definitely isn't the last one. */
 657 void
 658 thread_deallocate_safe(thread_t thread)
 659 {
 660         assert_thread_magic(thread);
 661
 662         uint32_t old_refcount = atomic_fetch_sub_explicit(&thread->ref_count, 1, memory_order_release);
 663
 664         if (__improbable(old_refcount <= 1))
 665                 panic("bad thread refcount: %d", old_refcount);
 666 }
 667
 668 void
 669 thread_deallocate(
 670         thread_t                        thread)
 671 {
 672         task_t                          task;
 673         __assert_only uint32_t          th_ref_count;
 674
 675         if (thread == THREAD_NULL)
 676                 return;
 677
 678         assert_thread_magic(thread);
 679
 680         if (__probable(atomic_fetch_sub_explicit(&thread->ref_count, 1,
 681                        memory_order_release) - 1 > 0)) {
 682                 return;
 683         }
 684
 685         th_ref_count = atomic_load_explicit(&thread->ref_count, memory_order_acquire);
 686         assert(th_ref_count == 0);
 687
 688         assert(thread_owned_workloops_count(thread) == 0);
 689
 690         if (!(thread->state & TH_TERMINATE2))
 691                 panic("thread_deallocate: thread not properly terminated\n");
 692
 693         assert(thread->runq == PROCESSOR_NULL);
 694
 695         assert(thread->user_promotions == 0);
 696
 697 #if KPC
 698         kpc_thread_destroy(thread);
 699 #endif
 700
 701         ipc_thread_terminate(thread);
 702
 703         proc_thread_qos_deallocate(thread);
 704
 705         task = thread->task;
 706
 707 #ifdef MACH_BSD
 708         {
 709                 void *ut = thread->uthread;
 710
 711                 thread->uthread = NULL;
 712                 uthread_zone_free(ut);
 713         }
 714 #endif /* MACH_BSD */
 715
 716         if (thread->t_ledger)
 717                 ledger_dereference(thread->t_ledger);
 718         if (thread->t_threadledger)
 719                 ledger_dereference(thread->t_threadledger);
 720
 721         if (IPC_VOUCHER_NULL != thread->ith_voucher)
 722                 ipc_voucher_release(thread->ith_voucher);
 723
 724         if (thread->thread_io_stats)
 725                 kfree(thread->thread_io_stats, sizeof(struct io_stat_info));
 726
 727         if (thread->kernel_stack != 0)
 728                 stack_free(thread);
 729
 730         lck_mtx_destroy(&thread->mutex, &thread_lck_grp);
 731         machine_thread_destroy(thread);
 732
 733         task_deallocate(task);
 734
 735 #if MACH_ASSERT
 736         assert_thread_magic(thread);
 737         thread->thread_magic = 0;
 738 #endif /* MACH_ASSERT */
 739
 740         zfree(thread_zone, thread);
 741 }
 742
 743 void
 744 thread_starts_owning_workloop(thread_t thread)
 745 {
 746         atomic_fetch_add_explicit(&thread->kqwl_owning_count, 1,
 747                         memory_order_relaxed);
 748 }
 749
 750 void
 751 thread_ends_owning_workloop(thread_t thread)
 752 {
 753         __assert_only uint32_t count;
 754         count = atomic_fetch_sub_explicit(&thread->kqwl_owning_count, 1,
 755                         memory_order_relaxed);
 756         assert(count > 0);
 757 }
 758
 759 uint32_t
 760 thread_owned_workloops_count(thread_t thread)
 761 {
 762         return atomic_load_explicit(&thread->kqwl_owning_count,
 763                         memory_order_relaxed);
 764 }
 765
 766 /*
 767  *      thread_inspect_deallocate:
 768  *
 769  *      Drop a thread inspection reference.
 770  */
 771 void
 772 thread_inspect_deallocate(
 773         thread_inspect_t                thread_inspect)
 774 {
 775         return(thread_deallocate((thread_t)thread_inspect));
 776 }
 777
 778 /*
 779  *      thread_exception_daemon:
 780  *
 781  *      Deliver EXC_{RESOURCE,GUARD} exception
 782  */
 783 static void
 784 thread_exception_daemon(void)
 785 {
 786         struct thread_exception_elt *elt;
 787         task_t task;
 788         thread_t thread;
 789         exception_type_t etype;
 790
 791         simple_lock(&thread_exception_lock);
 792         while ((elt = (struct thread_exception_elt *)dequeue_head(&thread_exception_queue)) != NULL) {
 793                 simple_unlock(&thread_exception_lock);
 794
 795                 etype = elt->exception_type;
 796                 task = elt->exception_task;
 797                 thread = elt->exception_thread;
 798                 assert_thread_magic(thread);
 799
 800                 kfree(elt, sizeof (*elt));
 801
 802                 /* wait for all the threads in the task to terminate */
 803                 task_lock(task);
 804                 task_wait_till_threads_terminate_locked(task);
 805                 task_unlock(task);
 806
 807                 /* Consumes the task ref returned by task_generate_corpse_internal */
 808                 task_deallocate(task);
 809                 /* Consumes the thread ref returned by task_generate_corpse_internal */
 810                 thread_deallocate(thread);
 811
 812                 /* Deliver the notification, also clears the corpse. */
 813                 task_deliver_crash_notification(task, thread, etype, 0);
 814
 815                 simple_lock(&thread_exception_lock);
 816         }
 817
 818         assert_wait((event_t)&thread_exception_queue, THREAD_UNINT);
 819         simple_unlock(&thread_exception_lock);
 820
 821         thread_block((thread_continue_t)thread_exception_daemon);
 822 }
 823
 824 /*
 825  *      thread_exception_enqueue:
 826  *
 827  *      Enqueue a corpse port to be delivered an EXC_{RESOURCE,GUARD}.
 828  */
 829 void
 830 thread_exception_enqueue(
 831         task_t          task,
 832         thread_t        thread,
 833         exception_type_t etype)
 834 {
 835         assert(EXC_RESOURCE == etype || EXC_GUARD == etype);
 836         struct thread_exception_elt *elt = kalloc(sizeof (*elt));
 837         elt->exception_type = etype;
 838         elt->exception_task = task;
 839         elt->exception_thread = thread;
 840
 841         simple_lock(&thread_exception_lock);
 842         enqueue_tail(&thread_exception_queue, (queue_entry_t)elt);
 843         simple_unlock(&thread_exception_lock);
 844
 845         thread_wakeup((event_t)&thread_exception_queue);
 846 }
 847
 848 /*
 849  *      thread_copy_resource_info
 850  *
 851  *      Copy the resource info counters from source
 852  *      thread to destination thread.
 853  */
 854 void
 855 thread_copy_resource_info(
 856         thread_t dst_thread,
 857         thread_t src_thread)
 858 {
 859         dst_thread->thread_tag = src_thread->thread_tag;
 860         dst_thread->c_switch = src_thread->c_switch;
 861         dst_thread->p_switch = src_thread->p_switch;
 862         dst_thread->ps_switch = src_thread->ps_switch;
 863         dst_thread->precise_user_kernel_time = src_thread->precise_user_kernel_time;
 864         dst_thread->user_timer = src_thread->user_timer;
 865         dst_thread->user_timer_save = src_thread->user_timer_save;
 866         dst_thread->system_timer = src_thread->system_timer;
 867         dst_thread->system_timer_save = src_thread->system_timer_save;
 868         dst_thread->vtimer_user_save = src_thread->vtimer_user_save;
 869         dst_thread->vtimer_prof_save = src_thread->vtimer_prof_save;
 870         dst_thread->vtimer_rlim_save = src_thread->vtimer_rlim_save;
 871         dst_thread->vtimer_qos_save = src_thread->vtimer_qos_save;
 872         dst_thread->syscalls_unix = src_thread->syscalls_unix;
 873         dst_thread->syscalls_mach = src_thread->syscalls_mach;
 874         ledger_rollup(dst_thread->t_threadledger, src_thread->t_threadledger);
 875         *dst_thread->thread_io_stats = *src_thread->thread_io_stats;
 876 }
 877
 878 /*
 879  *      thread_terminate_daemon:
 880  *
 881  *      Perform final clean up for terminating threads.
 882  */
 883 static void
 884 thread_terminate_daemon(void)
 885 {
 886         thread_t        self, thread;
 887         task_t          task;
 888
 889         self = current_thread();
 890         self->options |= TH_OPT_SYSTEM_CRITICAL;
 891
 892         (void)splsched();
 893         simple_lock(&thread_terminate_lock);
 894
 895         while ((thread = qe_dequeue_head(&thread_terminate_queue, struct thread, runq_links)) != THREAD_NULL) {
 896                 assert_thread_magic(thread);
 897
 898                 /*
 899                  * if marked for crash reporting, skip reaping.
 900                  * The corpse delivery thread will clear bit and enqueue
 901                  * for reaping when done
 902                  */
 903                 if (thread->inspection){
 904                         enqueue_tail(&crashed_threads_queue, &thread->runq_links);
 905                         continue;
 906                 }
 907
 908                 simple_unlock(&thread_terminate_lock);
 909                 (void)spllo();
 910
 911                 task = thread->task;
 912
 913                 task_lock(task);
 914                 task->total_user_time += timer_grab(&thread->user_timer);
 915                 task->total_ptime += timer_grab(&thread->ptime);
 916                 if (thread->precise_user_kernel_time) {
 917                         task->total_system_time += timer_grab(&thread->system_timer);
 918                 } else {
 919                         task->total_user_time += timer_grab(&thread->system_timer);
 920                 }
 921
 922                 task->c_switch += thread->c_switch;
 923                 task->p_switch += thread->p_switch;
 924                 task->ps_switch += thread->ps_switch;
 925
 926                 task->syscalls_unix += thread->syscalls_unix;
 927                 task->syscalls_mach += thread->syscalls_mach;
 928
 929                 task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
 930                 task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
 931                 task->task_gpu_ns += ml_gpu_stat(thread);
 932                 task->task_energy += ml_energy_stat(thread);
 933
 934 #if MONOTONIC
 935                 mt_terminate_update(task, thread);
 936 #endif /* MONOTONIC */
 937
 938                 thread_update_qos_cpu_time(thread);
 939
 940                 queue_remove(&task->threads, thread, thread_t, task_threads);
 941                 task->thread_count--;
 942
 943                 /*
 944                  * If the task is being halted, and there is only one thread
 945                  * left in the task after this one, then wakeup that thread.
 946                  */
 947                 if (task->thread_count == 1 && task->halting)
 948                         thread_wakeup((event_t)&task->halting);
 949
 950                 task_unlock(task);
 951
 952                 lck_mtx_lock(&tasks_threads_lock);
 953                 queue_remove(&threads, thread, thread_t, threads);
 954                 threads_count--;
 955                 lck_mtx_unlock(&tasks_threads_lock);
 956
 957                 thread_deallocate(thread);
 958
 959                 (void)splsched();
 960                 simple_lock(&thread_terminate_lock);
 961         }
 962
 963         assert_wait((event_t)&thread_terminate_queue, THREAD_UNINT);
 964         simple_unlock(&thread_terminate_lock);
 965         /* splsched */
 966
 967         self->options &= ~TH_OPT_SYSTEM_CRITICAL;
 968         thread_block((thread_continue_t)thread_terminate_daemon);
 969         /*NOTREACHED*/
 970 }
 971
 972 /*
 973  *      thread_terminate_enqueue:
 974  *
 975  *      Enqueue a terminating thread for final disposition.
 976  *
 977  *      Called at splsched.
 978  */
 979 void
 980 thread_terminate_enqueue(
 981         thread_t                thread)
 982 {
 983         KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE, thread->thread_id);
 984
 985         simple_lock(&thread_terminate_lock);
 986         enqueue_tail(&thread_terminate_queue, &thread->runq_links);
 987         simple_unlock(&thread_terminate_lock);
 988
 989         thread_wakeup((event_t)&thread_terminate_queue);
 990 }
 991
 992 /*
 993  * thread_terminate_crashed_threads:
 994  * walk the list of crashed threads and put back set of threads
 995  * who are no longer being inspected.
 996  */
 997 void
 998 thread_terminate_crashed_threads()
 999 {
1000         thread_t th_remove;
1001         boolean_t should_wake_terminate_queue = FALSE;
1002
1003         simple_lock(&thread_terminate_lock);
1004         /*
1005          * loop through the crashed threads queue
1006          * to put any threads that are not being inspected anymore
1007          */
1008
1009         qe_foreach_element_safe(th_remove, &crashed_threads_queue, runq_links) {
1010                 /* make sure current_thread is never in crashed queue */
1011                 assert(th_remove != current_thread());
1012
1013                 if (th_remove->inspection == FALSE) {
1014                         re_queue_tail(&thread_terminate_queue, &th_remove->runq_links);
1015                         should_wake_terminate_queue = TRUE;
1016                 }
1017         }
1018
1019         simple_unlock(&thread_terminate_lock);
1020         if (should_wake_terminate_queue == TRUE) {
1021                 thread_wakeup((event_t)&thread_terminate_queue);
1022         }
1023 }
1024
1025 /*
1026  *      thread_stack_daemon:
1027  *
1028  *      Perform stack allocation as required due to
1029  *      invoke failures.
1030  */
1031 static void
1032 thread_stack_daemon(void)
1033 {
1034         thread_t                thread;
1035         spl_t                   s;
1036
1037         s = splsched();
1038         simple_lock(&thread_stack_lock);
1039
1040         while ((thread = qe_dequeue_head(&thread_stack_queue, struct thread, runq_links)) != THREAD_NULL) {
1041                 assert_thread_magic(thread);
1042
1043                 simple_unlock(&thread_stack_lock);
1044                 splx(s);
1045
1046                 /* allocate stack with interrupts enabled so that we can call into VM */
1047                 stack_alloc(thread);
1048
1049                 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_END, thread_tid(thread), 0, 0, 0, 0);
1050
1051                 s = splsched();
1052                 thread_lock(thread);
1053                 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1054                 thread_unlock(thread);
1055
1056                 simple_lock(&thread_stack_lock);
1057         }
1058
1059         assert_wait((event_t)&thread_stack_queue, THREAD_UNINT);
1060         simple_unlock(&thread_stack_lock);
1061         splx(s);
1062
1063         thread_block((thread_continue_t)thread_stack_daemon);
1064         /*NOTREACHED*/
1065 }
1066
1067 /*
1068  *      thread_stack_enqueue:
1069  *
1070  *      Enqueue a thread for stack allocation.
1071  *
1072  *      Called at splsched.
1073  */
1074 void
1075 thread_stack_enqueue(
1076         thread_t                thread)
1077 {
1078         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_START, thread_tid(thread), 0, 0, 0, 0);
1079         assert_thread_magic(thread);
1080
1081         simple_lock(&thread_stack_lock);
1082         enqueue_tail(&thread_stack_queue, &thread->runq_links);
1083         simple_unlock(&thread_stack_lock);
1084
1085         thread_wakeup((event_t)&thread_stack_queue);
1086 }
1087
1088 void
1089 thread_daemon_init(void)
1090 {
1091         kern_return_t   result;
1092         thread_t        thread = NULL;
1093
1094         simple_lock_init(&thread_terminate_lock, 0);
1095         queue_init(&thread_terminate_queue);
1096         queue_init(&crashed_threads_queue);
1097
1098         result = kernel_thread_start_priority((thread_continue_t)thread_terminate_daemon, NULL, MINPRI_KERNEL, &thread);
1099         if (result != KERN_SUCCESS)
1100                 panic("thread_daemon_init: thread_terminate_daemon");
1101
1102         thread_deallocate(thread);
1103
1104         simple_lock_init(&thread_stack_lock, 0);
1105         queue_init(&thread_stack_queue);
1106
1107         result = kernel_thread_start_priority((thread_continue_t)thread_stack_daemon, NULL, BASEPRI_PREEMPT_HIGH, &thread);
1108         if (result != KERN_SUCCESS)
1109                 panic("thread_daemon_init: thread_stack_daemon");
1110
1111         thread_deallocate(thread);
1112
1113         simple_lock_init(&thread_exception_lock, 0);
1114         queue_init(&thread_exception_queue);
1115
1116         result = kernel_thread_start_priority((thread_continue_t)thread_exception_daemon, NULL, MINPRI_KERNEL, &thread);
1117         if (result != KERN_SUCCESS)
1118                 panic("thread_daemon_init: thread_exception_daemon");
1119
1120         thread_deallocate(thread);
1121 }
1122
1123 #define TH_OPTION_NONE          0x00
1124 #define TH_OPTION_NOCRED        0x01
1125 #define TH_OPTION_NOSUSP        0x02
1126
1127 /*
1128  * Create a new thread.
1129  * Doesn't start the thread running.
1130  *
1131  * Task and tasks_threads_lock are returned locked on success.
1132  */
1133 static kern_return_t
1134 thread_create_internal(
1135         task_t                                  parent_task,
1136         integer_t                               priority,
1137         thread_continue_t               continuation,
1138         int                                             options,
1139         thread_t                                *out_thread)
1140 {
1141         thread_t                                new_thread;
1142         static thread_t                 first_thread;
1143
1144         /*
1145          *      Allocate a thread and initialize static fields
1146          */
1147         if (first_thread == THREAD_NULL)
1148                 new_thread = first_thread = current_thread();
1149         else
1150                 new_thread = (thread_t)zalloc(thread_zone);
1151         if (new_thread == THREAD_NULL)
1152                 return (KERN_RESOURCE_SHORTAGE);
1153
1154         if (new_thread != first_thread)
1155                 *new_thread = thread_template;
1156
1157 #ifdef MACH_BSD
1158         new_thread->uthread = uthread_alloc(parent_task, new_thread, (options & TH_OPTION_NOCRED) != 0);
1159         if (new_thread->uthread == NULL) {
1160 #if MACH_ASSERT
1161                 new_thread->thread_magic = 0;
1162 #endif /* MACH_ASSERT */
1163
1164                 zfree(thread_zone, new_thread);
1165                 return (KERN_RESOURCE_SHORTAGE);
1166         }
1167 #endif  /* MACH_BSD */
1168
1169         if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) {
1170 #ifdef MACH_BSD
1171                 void *ut = new_thread->uthread;
1172
1173                 new_thread->uthread = NULL;
1174                 /* cred free may not be necessary */
1175                 uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1176                 uthread_cred_free(ut);
1177                 uthread_zone_free(ut);
1178 #endif  /* MACH_BSD */
1179
1180 #if MACH_ASSERT
1181                 new_thread->thread_magic = 0;
1182 #endif /* MACH_ASSERT */
1183
1184                 zfree(thread_zone, new_thread);
1185                 return (KERN_FAILURE);
1186         }
1187
1188         new_thread->task = parent_task;
1189
1190         thread_lock_init(new_thread);
1191         wake_lock_init(new_thread);
1192
1193         lck_mtx_init(&new_thread->mutex, &thread_lck_grp, &thread_lck_attr);
1194
1195         ipc_thread_init(new_thread);
1196
1197         new_thread->continuation = continuation;
1198
1199         /* Allocate I/O Statistics structure */
1200         new_thread->thread_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1201         assert(new_thread->thread_io_stats != NULL);
1202         bzero(new_thread->thread_io_stats, sizeof(struct io_stat_info));
1203         new_thread->sync_ipc_overrides = 0;
1204
1205 #if KASAN
1206         kasan_init_thread(&new_thread->kasan_data);
1207 #endif
1208
1209 #if CONFIG_IOSCHED
1210         /* Clear out the I/O Scheduling info for AppleFSCompression */
1211         new_thread->decmp_upl = NULL;
1212 #endif /* CONFIG_IOSCHED */
1213
1214         lck_mtx_lock(&tasks_threads_lock);
1215         task_lock(parent_task);
1216
1217         /*
1218          * Fail thread creation if parent task is being torn down or has too many threads
1219          * If the caller asked for TH_OPTION_NOSUSP, also fail if the parent task is suspended
1220          */
1221         if (parent_task->active == 0 || parent_task->halting ||
1222             (parent_task->suspend_count > 0 && (options & TH_OPTION_NOSUSP) != 0) ||
1223             (parent_task->thread_count >= task_threadmax && parent_task != kernel_task)) {
1224                 task_unlock(parent_task);
1225                 lck_mtx_unlock(&tasks_threads_lock);
1226
1227 #ifdef MACH_BSD
1228                 {
1229                         void *ut = new_thread->uthread;
1230
1231                         new_thread->uthread = NULL;
1232                         uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1233                         /* cred free may not be necessary */
1234                         uthread_cred_free(ut);
1235                         uthread_zone_free(ut);
1236                 }
1237 #endif  /* MACH_BSD */
1238                 ipc_thread_disable(new_thread);
1239                 ipc_thread_terminate(new_thread);
1240                 kfree(new_thread->thread_io_stats, sizeof(struct io_stat_info));
1241                 lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp);
1242                 machine_thread_destroy(new_thread);
1243                 zfree(thread_zone, new_thread);
1244                 return (KERN_FAILURE);
1245         }
1246
1247         /* New threads inherit any default state on the task */
1248         machine_thread_inherit_taskwide(new_thread, parent_task);
1249
1250         task_reference_internal(parent_task);
1251
1252         if (new_thread->task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
1253                 /*
1254                  * This task has a per-thread CPU limit; make sure this new thread
1255                  * gets its limit set too, before it gets out of the kernel.
1256                  */
1257                 act_set_astledger(new_thread);
1258         }
1259
1260         /* Instantiate a thread ledger. Do not fail thread creation if ledger creation fails. */
1261         if ((new_thread->t_threadledger = ledger_instantiate(thread_ledger_template,
1262                                 LEDGER_CREATE_INACTIVE_ENTRIES)) != LEDGER_NULL) {
1263
1264                 ledger_entry_setactive(new_thread->t_threadledger, thread_ledgers.cpu_time);
1265         }
1266
1267         new_thread->t_bankledger = LEDGER_NULL;
1268         new_thread->t_deduct_bank_ledger_time = 0;
1269         new_thread->t_deduct_bank_ledger_energy = 0;
1270
1271         new_thread->t_ledger = new_thread->task->ledger;
1272         if (new_thread->t_ledger)
1273                 ledger_reference(new_thread->t_ledger);
1274
1275 #if defined(CONFIG_SCHED_MULTIQ)
1276         /* Cache the task's sched_group */
1277         new_thread->sched_group = parent_task->sched_group;
1278 #endif /* defined(CONFIG_SCHED_MULTIQ) */
1279
1280         /* Cache the task's map */
1281         new_thread->map = parent_task->map;
1282
1283         timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread);
1284         timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread);
1285
1286 #if KPC
1287         kpc_thread_create(new_thread);
1288 #endif
1289
1290         /* Set the thread's scheduling parameters */
1291         new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task);
1292         new_thread->max_priority = parent_task->max_priority;
1293         new_thread->task_priority = parent_task->priority;
1294
1295         int new_priority = (priority < 0) ? parent_task->priority: priority;
1296         new_priority = (priority < 0)? parent_task->priority: priority;
1297         if (new_priority > new_thread->max_priority)
1298                 new_priority = new_thread->max_priority;
1299 #if CONFIG_EMBEDDED
1300         if (new_priority < MAXPRI_THROTTLE) {
1301                 new_priority = MAXPRI_THROTTLE;
1302         }
1303 #endif /* CONFIG_EMBEDDED */
1304
1305         new_thread->importance = new_priority - new_thread->task_priority;
1306
1307         sched_set_thread_base_priority(new_thread, new_priority);
1308
1309 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1310         new_thread->sched_stamp = sched_tick;
1311         new_thread->pri_shift = sched_pri_shifts[new_thread->th_sched_bucket];
1312 #endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) */
1313
1314 #if CONFIG_EMBEDDED
1315         if (parent_task->max_priority <= MAXPRI_THROTTLE)
1316                 sched_thread_mode_demote(new_thread, TH_SFLAG_THROTTLED);
1317 #endif /* CONFIG_EMBEDDED */
1318
1319         thread_policy_create(new_thread);
1320
1321         /* Chain the thread onto the task's list */
1322         queue_enter(&parent_task->threads, new_thread, thread_t, task_threads);
1323         parent_task->thread_count++;
1324
1325         /* So terminating threads don't need to take the task lock to decrement */
1326         hw_atomic_add(&parent_task->active_thread_count, 1);
1327
1328         /* Protected by the tasks_threads_lock */
1329         new_thread->thread_id = ++thread_unique_id;
1330
1331
1332         queue_enter(&threads, new_thread, thread_t, threads);
1333         threads_count++;
1334
1335         new_thread->active = TRUE;
1336         if (task_is_a_corpse_fork(parent_task)) {
1337                 /* Set the inspection bit if the task is a corpse fork */
1338                 new_thread->inspection = TRUE;
1339         } else {
1340                 new_thread->inspection = FALSE;
1341         }
1342         new_thread->corpse_dup = FALSE;
1343         *out_thread = new_thread;
1344
1345         if (kdebug_enable) {
1346                 long args[4] = {};
1347
1348                 kdbg_trace_data(parent_task->bsd_info, &args[1], &args[3]);
1349
1350                 /*
1351                  * Starting with 26604425, exec'ing creates a new task/thread.
1352                  *
1353                  * NEWTHREAD in the current process has two possible meanings:
1354                  *
1355                  * 1) Create a new thread for this process.
1356                  * 2) Create a new thread for the future process this will become in an
1357                  * exec.
1358                  *
1359                  * To disambiguate these, arg3 will be set to TRUE for case #2.
1360                  *
1361                  * The value we need to find (TPF_EXEC_COPY) is stable in the case of a
1362                  * task exec'ing. The read of t_procflags does not take the proc_lock.
1363                  */
1364                 args[2] = task_is_exec_copy(parent_task) ? 1 : 0;
1365
1366                 KDBG_RELEASE(TRACE_DATA_NEWTHREAD, (uintptr_t)thread_tid(new_thread),
1367                                 args[1], args[2], args[3]);
1368
1369                 kdbg_trace_string(parent_task->bsd_info, &args[0], &args[1],
1370                                 &args[2], &args[3]);
1371                 KDBG_RELEASE(TRACE_STRING_NEWTHREAD, args[0], args[1], args[2],
1372                                 args[3]);
1373         }
1374
1375         DTRACE_PROC1(lwp__create, thread_t, *out_thread);
1376
1377         return (KERN_SUCCESS);
1378 }
1379
1380 static kern_return_t
1381 thread_create_internal2(
1382         task_t                          task,
1383         thread_t                        *new_thread,
1384         boolean_t                       from_user,
1385         thread_continue_t               continuation)
1386 {
1387         kern_return_t           result;
1388         thread_t                        thread;
1389
1390         if (task == TASK_NULL || task == kernel_task)
1391                 return (KERN_INVALID_ARGUMENT);
1392
1393         result = thread_create_internal(task, -1, continuation, TH_OPTION_NONE, &thread);
1394         if (result != KERN_SUCCESS)
1395                 return (result);
1396
1397         thread->user_stop_count = 1;
1398         thread_hold(thread);
1399         if (task->suspend_count > 0)
1400                 thread_hold(thread);
1401
1402         if (from_user)
1403                 extmod_statistics_incr_thread_create(task);
1404
1405         task_unlock(task);
1406         lck_mtx_unlock(&tasks_threads_lock);
1407
1408         *new_thread = thread;
1409
1410         return (KERN_SUCCESS);
1411 }
1412
1413 /* No prototype, since task_server.h has the _from_user version if KERNEL_SERVER */
1414 kern_return_t
1415 thread_create(
1416         task_t                          task,
1417         thread_t                        *new_thread);
1418
1419 kern_return_t
1420 thread_create(
1421         task_t                          task,
1422         thread_t                        *new_thread)
1423 {
1424         return thread_create_internal2(task, new_thread, FALSE, (thread_continue_t)thread_bootstrap_return);
1425 }
1426
1427 kern_return_t
1428 thread_create_from_user(
1429         task_t                          task,
1430         thread_t                        *new_thread)
1431 {
1432         return thread_create_internal2(task, new_thread, TRUE, (thread_continue_t)thread_bootstrap_return);
1433 }
1434
1435 kern_return_t
1436 thread_create_with_continuation(
1437         task_t                          task,
1438         thread_t                        *new_thread,
1439         thread_continue_t               continuation)
1440 {
1441         return thread_create_internal2(task, new_thread, FALSE, continuation);
1442 }
1443
1444 /*
1445  * Create a thread that is already started, but is waiting on an event
1446  */
1447 static kern_return_t
1448 thread_create_waiting_internal(
1449         task_t                  task,
1450         thread_continue_t       continuation,
1451         event_t                 event,
1452         int                     options,
1453         thread_t                *new_thread)
1454 {
1455         kern_return_t result;
1456         thread_t thread;
1457
1458         if (task == TASK_NULL || task == kernel_task)
1459                 return (KERN_INVALID_ARGUMENT);
1460
1461         result = thread_create_internal(task, -1, continuation, options, &thread);
1462         if (result != KERN_SUCCESS)
1463                 return (result);
1464
1465         /* note no user_stop_count or thread_hold here */
1466
1467         if (task->suspend_count > 0)
1468                 thread_hold(thread);
1469
1470         thread_mtx_lock(thread);
1471         thread_start_in_assert_wait(thread, event, THREAD_INTERRUPTIBLE);
1472         thread_mtx_unlock(thread);
1473
1474         task_unlock(task);
1475         lck_mtx_unlock(&tasks_threads_lock);
1476
1477         *new_thread = thread;
1478
1479         return (KERN_SUCCESS);
1480 }
1481
1482 kern_return_t
1483 thread_create_waiting(
1484         task_t                  task,
1485         thread_continue_t       continuation,
1486         event_t                 event,
1487         thread_t                *new_thread)
1488 {
1489         return thread_create_waiting_internal(task, continuation, event,
1490                                               TH_OPTION_NONE, new_thread);
1491 }
1492
1493
1494 static kern_return_t
1495 thread_create_running_internal2(
1496         task_t         task,
1497         int                     flavor,
1498         thread_state_t          new_state,
1499         mach_msg_type_number_t  new_state_count,
1500         thread_t                                *new_thread,
1501         boolean_t                               from_user)
1502 {
1503         kern_return_t  result;
1504         thread_t                                thread;
1505
1506         if (task == TASK_NULL || task == kernel_task)
1507                 return (KERN_INVALID_ARGUMENT);
1508
1509         result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, TH_OPTION_NONE, &thread);
1510         if (result != KERN_SUCCESS)
1511                 return (result);
1512
1513         if (task->suspend_count > 0)
1514                 thread_hold(thread);
1515
1516         result = machine_thread_set_state(thread, flavor, new_state, new_state_count);
1517         if (result != KERN_SUCCESS) {
1518                 task_unlock(task);
1519                 lck_mtx_unlock(&tasks_threads_lock);
1520
1521                 thread_terminate(thread);
1522                 thread_deallocate(thread);
1523                 return (result);
1524         }
1525
1526         thread_mtx_lock(thread);
1527         thread_start(thread);
1528         thread_mtx_unlock(thread);
1529
1530         if (from_user)
1531                 extmod_statistics_incr_thread_create(task);
1532
1533         task_unlock(task);
1534         lck_mtx_unlock(&tasks_threads_lock);
1535
1536         *new_thread = thread;
1537
1538         return (result);
1539 }
1540
1541 /* Prototype, see justification above */
1542 kern_return_t
1543 thread_create_running(
1544         task_t         task,
1545         int                     flavor,
1546         thread_state_t          new_state,
1547         mach_msg_type_number_t  new_state_count,
1548         thread_t                                *new_thread);
1549
1550 kern_return_t
1551 thread_create_running(
1552         task_t         task,
1553         int                     flavor,
1554         thread_state_t          new_state,
1555         mach_msg_type_number_t  new_state_count,
1556         thread_t                                *new_thread)
1557 {
1558         return thread_create_running_internal2(
1559                 task, flavor, new_state, new_state_count,
1560                 new_thread, FALSE);
1561 }
1562
1563 kern_return_t
1564 thread_create_running_from_user(
1565         task_t         task,
1566         int                     flavor,
1567         thread_state_t          new_state,
1568         mach_msg_type_number_t  new_state_count,
1569         thread_t                                *new_thread)
1570 {
1571         return thread_create_running_internal2(
1572                 task, flavor, new_state, new_state_count,
1573                 new_thread, TRUE);
1574 }
1575
1576 kern_return_t
1577 thread_create_workq(
1578         task_t                          task,
1579         thread_continue_t               thread_return,
1580         thread_t                        *new_thread)
1581 {
1582         kern_return_t           result;
1583         thread_t                        thread;
1584
1585         if (task == TASK_NULL || task == kernel_task)
1586                 return (KERN_INVALID_ARGUMENT);
1587
1588         result = thread_create_internal(task, -1, thread_return, TH_OPTION_NOCRED | TH_OPTION_NOSUSP, &thread);
1589         if (result != KERN_SUCCESS)
1590                 return (result);
1591
1592         thread->user_stop_count = 1;
1593         thread_hold(thread);
1594         if (task->suspend_count > 0)
1595                 thread_hold(thread);
1596
1597         task_unlock(task);
1598         lck_mtx_unlock(&tasks_threads_lock);
1599
1600         *new_thread = thread;
1601
1602         return (KERN_SUCCESS);
1603 }
1604
1605 kern_return_t
1606 thread_create_workq_waiting(
1607         task_t              task,
1608         thread_continue_t   continuation,
1609         event_t             event,
1610         thread_t            *new_thread)
1611 {
1612
1613         return thread_create_waiting_internal(task, continuation, event,
1614                                               TH_OPTION_NOCRED | TH_OPTION_NOSUSP,
1615                                               new_thread);
1616 }
1617
1618 /*
1619  *      kernel_thread_create:
1620  *
1621  *      Create a thread in the kernel task
1622  *      to execute in kernel context.
1623  */
1624 kern_return_t
1625 kernel_thread_create(
1626         thread_continue_t       continuation,
1627         void                            *parameter,
1628         integer_t                       priority,
1629         thread_t                        *new_thread)
1630 {
1631         kern_return_t           result;
1632         thread_t                        thread;
1633         task_t                          task = kernel_task;
1634
1635         result = thread_create_internal(task, priority, continuation, TH_OPTION_NOCRED | TH_OPTION_NONE, &thread);
1636         if (result != KERN_SUCCESS)
1637                 return (result);
1638
1639         task_unlock(task);
1640         lck_mtx_unlock(&tasks_threads_lock);
1641
1642         stack_alloc(thread);
1643         assert(thread->kernel_stack != 0);
1644 #if CONFIG_EMBEDDED
1645         if (priority > BASEPRI_KERNEL)
1646 #endif
1647         thread->reserved_stack = thread->kernel_stack;
1648
1649         thread->parameter = parameter;
1650
1651 if(debug_task & 1)
1652         kprintf("kernel_thread_create: thread = %p continuation = %p\n", thread, continuation);
1653         *new_thread = thread;
1654
1655         return (result);
1656 }
1657
1658 kern_return_t
1659 kernel_thread_start_priority(
1660         thread_continue_t       continuation,
1661         void                            *parameter,
1662         integer_t                       priority,
1663         thread_t                        *new_thread)
1664 {
1665         kern_return_t   result;
1666         thread_t                thread;
1667
1668         result = kernel_thread_create(continuation, parameter, priority, &thread);
1669         if (result != KERN_SUCCESS)
1670                 return (result);
1671
1672         *new_thread = thread;
1673
1674         thread_mtx_lock(thread);
1675         thread_start(thread);
1676         thread_mtx_unlock(thread);
1677
1678         return (result);
1679 }
1680
1681 kern_return_t
1682 kernel_thread_start(
1683         thread_continue_t       continuation,
1684         void                            *parameter,
1685         thread_t                        *new_thread)
1686 {
1687         return kernel_thread_start_priority(continuation, parameter, -1, new_thread);
1688 }
1689
1690 /* Separated into helper function so it can be used by THREAD_BASIC_INFO and THREAD_EXTENDED_INFO */
1691 /* it is assumed that the thread is locked by the caller */
1692 static void
1693 retrieve_thread_basic_info(thread_t thread, thread_basic_info_t basic_info)
1694 {
1695         int     state, flags;
1696
1697         /* fill in info */
1698
1699         thread_read_times(thread, &basic_info->user_time,
1700                                                                 &basic_info->system_time);
1701
1702         /*
1703          *      Update lazy-evaluated scheduler info because someone wants it.
1704          */
1705         if (SCHED(can_update_priority)(thread))
1706                 SCHED(update_priority)(thread);
1707
1708         basic_info->sleep_time = 0;
1709
1710         /*
1711          *      To calculate cpu_usage, first correct for timer rate,
1712          *      then for 5/8 ageing.  The correction factor [3/5] is
1713          *      (1/(5/8) - 1).
1714          */
1715         basic_info->cpu_usage = 0;
1716 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1717         if (sched_tick_interval) {
1718                 basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage
1719                                                                         * TH_USAGE_SCALE) /     sched_tick_interval);
1720                 basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
1721         }
1722 #endif
1723
1724         if (basic_info->cpu_usage > TH_USAGE_SCALE)
1725                 basic_info->cpu_usage = TH_USAGE_SCALE;
1726
1727         basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
1728                                                                                         POLICY_TIMESHARE: POLICY_RR);
1729
1730         flags = 0;
1731         if (thread->options & TH_OPT_IDLE_THREAD)
1732                 flags |= TH_FLAGS_IDLE;
1733
1734         if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
1735                 flags |= TH_FLAGS_GLOBAL_FORCED_IDLE;
1736         }
1737
1738         if (!thread->kernel_stack)
1739                 flags |= TH_FLAGS_SWAPPED;
1740
1741         state = 0;
1742         if (thread->state & TH_TERMINATE)
1743                 state = TH_STATE_HALTED;
1744         else
1745         if (thread->state & TH_RUN)
1746                 state = TH_STATE_RUNNING;
1747         else
1748         if (thread->state & TH_UNINT)
1749                 state = TH_STATE_UNINTERRUPTIBLE;
1750         else
1751         if (thread->state & TH_SUSP)
1752                 state = TH_STATE_STOPPED;
1753         else
1754         if (thread->state & TH_WAIT)
1755                 state = TH_STATE_WAITING;
1756
1757         basic_info->run_state = state;
1758         basic_info->flags = flags;
1759
1760         basic_info->suspend_count = thread->user_stop_count;
1761
1762         return;
1763 }
1764
1765 kern_return_t
1766 thread_info_internal(
1767         thread_t                thread,
1768         thread_flavor_t                 flavor,
1769         thread_info_t                   thread_info_out,        /* ptr to OUT array */
1770         mach_msg_type_number_t  *thread_info_count)     /*IN/OUT*/
1771 {
1772         spl_t   s;
1773
1774         if (thread == THREAD_NULL)
1775                 return (KERN_INVALID_ARGUMENT);
1776
1777         if (flavor == THREAD_BASIC_INFO) {
1778
1779                 if (*thread_info_count < THREAD_BASIC_INFO_COUNT)
1780                         return (KERN_INVALID_ARGUMENT);
1781
1782                 s = splsched();
1783                 thread_lock(thread);
1784
1785                 retrieve_thread_basic_info(thread, (thread_basic_info_t) thread_info_out);
1786
1787                 thread_unlock(thread);
1788                 splx(s);
1789
1790                 *thread_info_count = THREAD_BASIC_INFO_COUNT;
1791
1792                 return (KERN_SUCCESS);
1793         }
1794         else
1795         if (flavor == THREAD_IDENTIFIER_INFO) {
1796                 thread_identifier_info_t        identifier_info;
1797
1798                 if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT)
1799                         return (KERN_INVALID_ARGUMENT);
1800
1801                 identifier_info = (thread_identifier_info_t) thread_info_out;
1802
1803                 s = splsched();
1804                 thread_lock(thread);
1805
1806                 identifier_info->thread_id = thread->thread_id;
1807                 identifier_info->thread_handle = thread->machine.cthread_self;
1808                 identifier_info->dispatch_qaddr = thread_dispatchqaddr(thread);
1809
1810                 thread_unlock(thread);
1811                 splx(s);
1812                 return KERN_SUCCESS;
1813         }
1814         else
1815         if (flavor == THREAD_SCHED_TIMESHARE_INFO) {
1816                 policy_timeshare_info_t         ts_info;
1817
1818                 if (*thread_info_count < POLICY_TIMESHARE_INFO_COUNT)
1819                         return (KERN_INVALID_ARGUMENT);
1820
1821                 ts_info = (policy_timeshare_info_t)thread_info_out;
1822
1823                 s = splsched();
1824                 thread_lock(thread);
1825
1826                 if (thread->sched_mode != TH_MODE_TIMESHARE) {
1827                         thread_unlock(thread);
1828                         splx(s);
1829                         return (KERN_INVALID_POLICY);
1830                 }
1831
1832                 ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
1833                 if (ts_info->depressed) {
1834                         ts_info->base_priority = DEPRESSPRI;
1835                         ts_info->depress_priority = thread->base_pri;
1836                 }
1837                 else {
1838                         ts_info->base_priority = thread->base_pri;
1839                         ts_info->depress_priority = -1;
1840                 }
1841
1842                 ts_info->cur_priority = thread->sched_pri;
1843                 ts_info->max_priority = thread->max_priority;
1844
1845                 thread_unlock(thread);
1846                 splx(s);
1847
1848                 *thread_info_count = POLICY_TIMESHARE_INFO_COUNT;
1849
1850                 return (KERN_SUCCESS);
1851         }
1852         else
1853         if (flavor == THREAD_SCHED_FIFO_INFO) {
1854                 if (*thread_info_count < POLICY_FIFO_INFO_COUNT)
1855                         return (KERN_INVALID_ARGUMENT);
1856
1857                 return (KERN_INVALID_POLICY);
1858         }
1859         else
1860         if (flavor == THREAD_SCHED_RR_INFO) {
1861                 policy_rr_info_t                        rr_info;
1862                 uint32_t quantum_time;
1863                 uint64_t quantum_ns;
1864
1865                 if (*thread_info_count < POLICY_RR_INFO_COUNT)
1866                         return (KERN_INVALID_ARGUMENT);
1867
1868                 rr_info = (policy_rr_info_t) thread_info_out;
1869
1870                 s = splsched();
1871                 thread_lock(thread);
1872
1873                 if (thread->sched_mode == TH_MODE_TIMESHARE) {
1874                         thread_unlock(thread);
1875                         splx(s);
1876
1877                         return (KERN_INVALID_POLICY);
1878             }
1879
1880                 rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
1881                 if (rr_info->depressed) {
1882                         rr_info->base_priority = DEPRESSPRI;
1883                         rr_info->depress_priority = thread->base_pri;
1884                 }
1885                 else {
1886                         rr_info->base_priority = thread->base_pri;
1887                         rr_info->depress_priority = -1;
1888                 }
1889
1890                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
1891                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
1892
1893                 rr_info->max_priority = thread->max_priority;
1894                 rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
1895
1896                 thread_unlock(thread);
1897                 splx(s);
1898
1899                 *thread_info_count = POLICY_RR_INFO_COUNT;
1900
1901                 return (KERN_SUCCESS);
1902         }
1903         else
1904         if (flavor == THREAD_EXTENDED_INFO) {
1905                 thread_basic_info_data_t        basic_info;
1906                 thread_extended_info_t          extended_info = (thread_extended_info_t) thread_info_out;
1907
1908                 if (*thread_info_count < THREAD_EXTENDED_INFO_COUNT) {
1909                         return (KERN_INVALID_ARGUMENT);
1910                 }
1911
1912                 s = splsched();
1913                 thread_lock(thread);
1914
1915                 /* NOTE: This mimics fill_taskthreadinfo(), which is the function used by proc_pidinfo() for
1916                  * the PROC_PIDTHREADINFO flavor (which can't be used on corpses)
1917                  */
1918                 retrieve_thread_basic_info(thread, &basic_info);
1919                 extended_info->pth_user_time = ((basic_info.user_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.user_time.microseconds * (integer_t)NSEC_PER_USEC));
1920                 extended_info->pth_system_time = ((basic_info.system_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.system_time.microseconds * (integer_t)NSEC_PER_USEC));
1921
1922                 extended_info->pth_cpu_usage = basic_info.cpu_usage;
1923                 extended_info->pth_policy = basic_info.policy;
1924                 extended_info->pth_run_state = basic_info.run_state;
1925                 extended_info->pth_flags = basic_info.flags;
1926                 extended_info->pth_sleep_time = basic_info.sleep_time;
1927                 extended_info->pth_curpri = thread->sched_pri;
1928                 extended_info->pth_priority = thread->base_pri;
1929                 extended_info->pth_maxpriority = thread->max_priority;
1930
1931                 bsd_getthreadname(thread->uthread,extended_info->pth_name);
1932
1933                 thread_unlock(thread);
1934                 splx(s);
1935
1936                 *thread_info_count = THREAD_EXTENDED_INFO_COUNT;
1937
1938                 return (KERN_SUCCESS);
1939         }
1940         else
1941         if (flavor == THREAD_DEBUG_INFO_INTERNAL) {
1942 #if DEVELOPMENT || DEBUG
1943                 thread_debug_info_internal_t dbg_info;
1944                 if (*thread_info_count < THREAD_DEBUG_INFO_INTERNAL_COUNT)
1945                         return (KERN_NOT_SUPPORTED);
1946
1947                 if (thread_info_out == NULL)
1948                         return (KERN_INVALID_ARGUMENT);
1949
1950                 dbg_info = (thread_debug_info_internal_t) thread_info_out;
1951                 dbg_info->page_creation_count = thread->t_page_creation_count;
1952
1953                 *thread_info_count = THREAD_DEBUG_INFO_INTERNAL_COUNT;
1954                 return (KERN_SUCCESS);
1955 #endif /* DEVELOPMENT || DEBUG */
1956                 return (KERN_NOT_SUPPORTED);
1957         }
1958
1959         return (KERN_INVALID_ARGUMENT);
1960 }
1961
1962 void
1963 thread_read_times(
1964         thread_t                thread,
1965         time_value_t    *user_time,
1966         time_value_t    *system_time)
1967 {
1968         clock_sec_t             secs;
1969         clock_usec_t    usecs;
1970         uint64_t                tval_user, tval_system;
1971
1972         tval_user = timer_grab(&thread->user_timer);
1973         tval_system = timer_grab(&thread->system_timer);
1974
1975         if (thread->precise_user_kernel_time) {
1976                 absolutetime_to_microtime(tval_user, &secs, &usecs);
1977                 user_time->seconds = (typeof(user_time->seconds))secs;
1978                 user_time->microseconds = usecs;
1979
1980                 absolutetime_to_microtime(tval_system, &secs, &usecs);
1981                 system_time->seconds = (typeof(system_time->seconds))secs;
1982                 system_time->microseconds = usecs;
1983         } else {
1984                 /* system_timer may represent either sys or user */
1985                 tval_user += tval_system;
1986                 absolutetime_to_microtime(tval_user, &secs, &usecs);
1987                 user_time->seconds = (typeof(user_time->seconds))secs;
1988                 user_time->microseconds = usecs;
1989
1990                 system_time->seconds = 0;
1991                 system_time->microseconds = 0;
1992         }
1993 }
1994
1995 uint64_t thread_get_runtime_self(void)
1996 {
1997         boolean_t interrupt_state;
1998         uint64_t runtime;
1999         thread_t thread = NULL;
2000         processor_t processor = NULL;
2001
2002         thread = current_thread();
2003
2004         /* Not interrupt safe, as the scheduler may otherwise update timer values underneath us */
2005         interrupt_state = ml_set_interrupts_enabled(FALSE);
2006         processor = current_processor();
2007         timer_switch(PROCESSOR_DATA(processor, thread_timer), mach_absolute_time(), PROCESSOR_DATA(processor, thread_timer));
2008         runtime = (timer_grab(&thread->user_timer) + timer_grab(&thread->system_timer));
2009         ml_set_interrupts_enabled(interrupt_state);
2010
2011         return runtime;
2012 }
2013
2014 kern_return_t
2015 thread_assign(
2016         __unused thread_t                       thread,
2017         __unused processor_set_t        new_pset)
2018 {
2019         return (KERN_FAILURE);
2020 }
2021
2022 /*
2023  *      thread_assign_default:
2024  *
2025  *      Special version of thread_assign for assigning threads to default
2026  *      processor set.
2027  */
2028 kern_return_t
2029 thread_assign_default(
2030         thread_t                thread)
2031 {
2032         return (thread_assign(thread, &pset0));
2033 }
2034
2035 /*
2036  *      thread_get_assignment
2037  *
2038  *      Return current assignment for this thread.
2039  */
2040 kern_return_t
2041 thread_get_assignment(
2042         thread_t                thread,
2043         processor_set_t *pset)
2044 {
2045         if (thread == NULL)
2046                 return (KERN_INVALID_ARGUMENT);
2047
2048         *pset = &pset0;
2049
2050         return (KERN_SUCCESS);
2051 }
2052
2053 /*
2054  *      thread_wire_internal:
2055  *
2056  *      Specify that the target thread must always be able
2057  *      to run and to allocate memory.
2058  */
2059 kern_return_t
2060 thread_wire_internal(
2061         host_priv_t             host_priv,
2062         thread_t                thread,
2063         boolean_t               wired,
2064         boolean_t               *prev_state)
2065 {
2066         if (host_priv == NULL || thread != current_thread())
2067                 return (KERN_INVALID_ARGUMENT);
2068
2069         assert(host_priv == &realhost);
2070
2071         if (prev_state)
2072             *prev_state = (thread->options & TH_OPT_VMPRIV) != 0;
2073
2074         if (wired) {
2075             if (!(thread->options & TH_OPT_VMPRIV))
2076                     vm_page_free_reserve(1);    /* XXX */
2077             thread->options |= TH_OPT_VMPRIV;
2078         }
2079         else {
2080             if (thread->options & TH_OPT_VMPRIV)
2081                     vm_page_free_reserve(-1);   /* XXX */
2082             thread->options &= ~TH_OPT_VMPRIV;
2083         }
2084
2085         return (KERN_SUCCESS);
2086 }
2087
2088
2089 /*
2090  *      thread_wire:
2091  *
2092  *      User-api wrapper for thread_wire_internal()
2093  */
2094 kern_return_t
2095 thread_wire(
2096         host_priv_t     host_priv,
2097         thread_t        thread,
2098         boolean_t       wired)
2099 {
2100     return (thread_wire_internal(host_priv, thread, wired, NULL));
2101 }
2102
2103
2104 boolean_t
2105 is_vm_privileged(void)
2106 {
2107         return current_thread()->options & TH_OPT_VMPRIV ? TRUE : FALSE;
2108 }
2109
2110 boolean_t
2111 set_vm_privilege(boolean_t privileged)
2112 {
2113         boolean_t       was_vmpriv;
2114
2115         if (current_thread()->options & TH_OPT_VMPRIV)
2116                 was_vmpriv = TRUE;
2117         else
2118                 was_vmpriv = FALSE;
2119
2120         if (privileged != FALSE)
2121                 current_thread()->options |= TH_OPT_VMPRIV;
2122         else
2123                 current_thread()->options &= ~TH_OPT_VMPRIV;
2124
2125         return (was_vmpriv);
2126 }
2127
2128 void
2129 set_thread_rwlock_boost(void)
2130 {
2131         current_thread()->rwlock_count++;
2132 }
2133
2134 void
2135 clear_thread_rwlock_boost(void)
2136 {
2137         thread_t thread = current_thread();
2138
2139         if ((thread->rwlock_count-- == 1) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2140
2141                 lck_rw_clear_promotion(thread);
2142         }
2143 }
2144
2145
2146 /*
2147  * XXX assuming current thread only, for now...
2148  */
2149 void
2150 thread_guard_violation(thread_t thread,
2151     mach_exception_data_type_t code, mach_exception_data_type_t subcode)
2152 {
2153         assert(thread == current_thread());
2154         assert(thread->task != kernel_task);
2155
2156         spl_t s = splsched();
2157         /*
2158          * Use the saved state area of the thread structure
2159          * to store all info required to handle the AST when
2160          * returning to userspace
2161          */
2162         assert(EXC_GUARD_DECODE_GUARD_TYPE(code));
2163         thread->guard_exc_info.code = code;
2164         thread->guard_exc_info.subcode = subcode;
2165         thread_ast_set(thread, AST_GUARD);
2166         ast_propagate(thread);
2167
2168         splx(s);
2169 }
2170
2171 /*
2172  *      guard_ast:
2173  *
2174  *      Handle AST_GUARD for a thread. This routine looks at the
2175  *      state saved in the thread structure to determine the cause
2176  *      of this exception. Based on this value, it invokes the
2177  *      appropriate routine which determines other exception related
2178  *      info and raises the exception.
2179  */
2180 void
2181 guard_ast(thread_t t)
2182 {
2183         const mach_exception_data_type_t
2184                 code = t->guard_exc_info.code,
2185                 subcode = t->guard_exc_info.subcode;
2186
2187         switch (EXC_GUARD_DECODE_GUARD_TYPE(code)) {
2188         case GUARD_TYPE_MACH_PORT:
2189                 mach_port_guard_ast(t, code, subcode);
2190                 break;
2191         case GUARD_TYPE_FD:
2192                 fd_guard_ast(t, code, subcode);
2193                 break;
2194 #if CONFIG_VNGUARD
2195         case GUARD_TYPE_VN:
2196                 vn_guard_ast(t, code, subcode);
2197                 break;
2198 #endif
2199         default:
2200                 panic("guard_exc_info %llx %llx", code, subcode);
2201         }
2202 }
2203
2204 static void
2205 thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1)
2206 {
2207         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
2208 #if CONFIG_TELEMETRY
2209                 /*
2210                  * This thread is in danger of violating the CPU usage monitor. Enable telemetry
2211                  * on the entire task so there are micro-stackshots available if and when
2212                  * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots
2213                  * for this thread only; but now that this task is suspect, knowing what all of
2214                  * its threads are up to will be useful.
2215                  */
2216                 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1);
2217 #endif
2218                 return;
2219         }
2220
2221 #if CONFIG_TELEMETRY
2222         /*
2223          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
2224          * exceeded the limit, turn telemetry off for the task.
2225          */
2226         telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0);
2227 #endif
2228
2229         if (warning == 0) {
2230                 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU();
2231         }
2232 }
2233
2234 void __attribute__((noinline))
2235 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void)
2236 {
2237         int          pid                = 0;
2238         task_t           task                           = current_task();
2239         thread_t     thread             = current_thread();
2240         uint64_t     tid                = thread->thread_id;
2241         const char       *procname          = "unknown";
2242         time_value_t thread_total_time  = {0, 0};
2243         time_value_t thread_system_time;
2244         time_value_t thread_user_time;
2245         int          action;
2246         uint8_t      percentage;
2247         uint32_t     usage_percent = 0;
2248         uint32_t     interval_sec;
2249         uint64_t     interval_ns;
2250         uint64_t     balance_ns;
2251         boolean_t        fatal = FALSE;
2252         boolean_t        send_exc_resource = TRUE; /* in addition to RESOURCE_NOTIFY */
2253         kern_return_t   kr;
2254
2255 #ifdef EXC_RESOURCE_MONITORS
2256         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
2257 #endif /* EXC_RESOURCE_MONITORS */
2258         struct ledger_entry_info        lei;
2259
2260         assert(thread->t_threadledger != LEDGER_NULL);
2261
2262         /*
2263          * Extract the fatal bit and suspend the monitor (which clears the bit).
2264          */
2265         task_lock(task);
2266         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) {
2267                 fatal = TRUE;
2268                 send_exc_resource = TRUE;
2269         }
2270         /* Only one thread can be here at a time.  Whichever makes it through
2271            first will successfully suspend the monitor and proceed to send the
2272            notification.  Other threads will get an error trying to suspend the
2273            monitor and give up on sending the notification.  In the first release,
2274            the monitor won't be resumed for a number of seconds, but we may
2275            eventually need to handle low-latency resume.
2276          */
2277         kr = task_suspend_cpumon(task);
2278         task_unlock(task);
2279         if (kr == KERN_INVALID_ARGUMENT)        return;
2280
2281 #ifdef MACH_BSD
2282         pid = proc_selfpid();
2283         if (task->bsd_info != NULL) {
2284                 procname = proc_name_address(task->bsd_info);
2285         }
2286 #endif
2287
2288         thread_get_cpulimit(&action, &percentage, &interval_ns);
2289
2290         interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC);
2291
2292         thread_read_times(thread, &thread_user_time, &thread_system_time);
2293         time_value_add(&thread_total_time, &thread_user_time);
2294         time_value_add(&thread_total_time, &thread_system_time);
2295         ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei);
2296
2297         /* credit/debit/balance/limit are in absolute time units;
2298            the refill info is in nanoseconds. */
2299         absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns);
2300         if (lei.lei_last_refill > 0) {
2301                 usage_percent = (uint32_t)((balance_ns*100ULL) / lei.lei_last_refill);
2302         }
2303
2304         /* TODO: show task total runtime (via TASK_ABSOLUTETIME_INFO)? */
2305         printf("process %s[%d] thread %llu caught burning CPU! "
2306                "It used more than %d%% CPU over %u seconds "
2307                "(actual recent usage: %d%% over ~%llu seconds).  "
2308                "Thread lifetime cpu usage %d.%06ds, (%d.%06d user, %d.%06d sys) "
2309                "ledger balance: %lld mabs credit: %lld mabs debit: %lld mabs "
2310                "limit: %llu mabs period: %llu ns last refill: %llu ns%s.\n",
2311                procname, pid, tid,
2312                percentage, interval_sec,
2313                usage_percent,
2314                (lei.lei_last_refill + NSEC_PER_SEC/2) / NSEC_PER_SEC,
2315                thread_total_time.seconds, thread_total_time.microseconds,
2316                thread_user_time.seconds, thread_user_time.microseconds,
2317                thread_system_time.seconds,thread_system_time.microseconds,
2318                lei.lei_balance, lei.lei_credit, lei.lei_debit,
2319                lei.lei_limit, lei.lei_refill_period, lei.lei_last_refill,
2320                (fatal ? " [fatal violation]" : ""));
2321
2322         /*
2323            For now, send RESOURCE_NOTIFY in parallel with EXC_RESOURCE.  Once
2324            we have logging parity, we will stop sending EXC_RESOURCE (24508922).
2325          */
2326
2327         /* RESOURCE_NOTIFY MIG specifies nanoseconds of CPU time */
2328         lei.lei_balance = balance_ns;
2329         absolutetime_to_nanoseconds(lei.lei_limit, &lei.lei_limit);
2330         trace_resource_violation(RMON_CPUUSAGE_VIOLATED, &lei);
2331         kr = send_resource_violation(send_cpu_usage_violation, task, &lei,
2332                                                                  fatal ? kRNFatalLimitFlag : 0);
2333         if (kr) {
2334                 printf("send_resource_violation(CPU usage, ...): error %#x\n", kr);
2335         }
2336
2337 #ifdef EXC_RESOURCE_MONITORS
2338         if (send_exc_resource) {
2339                 if (disable_exc_resource) {
2340                         printf("process %s[%d] thread %llu caught burning CPU! "
2341                                    "EXC_RESOURCE%s supressed by a boot-arg\n",
2342                                    procname, pid, tid, fatal ? " (and termination)" : "");
2343                         return;
2344                 }
2345
2346                 if (audio_active) {
2347                         printf("process %s[%d] thread %llu caught burning CPU! "
2348                            "EXC_RESOURCE & termination supressed due to audio playback\n",
2349                                    procname, pid, tid);
2350                         return;
2351                 }
2352         }
2353
2354
2355         if (send_exc_resource) {
2356                 code[0] = code[1] = 0;
2357                 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU);
2358                 if (fatal) {
2359                         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR_FATAL);
2360                 }else {
2361                         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR);
2362                 }
2363                 EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec);
2364                 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], percentage);
2365                 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent);
2366                 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
2367         }
2368 #endif /* EXC_RESOURCE_MONITORS */
2369
2370         if (fatal) {
2371 #if CONFIG_JETSAM
2372                 jetsam_on_ledger_cpulimit_exceeded();
2373 #else
2374                 task_terminate_internal(task);
2375 #endif
2376         }
2377 }
2378
2379 void thread_update_io_stats(thread_t thread, int size, int io_flags)
2380 {
2381         int io_tier;
2382
2383         if (thread->thread_io_stats == NULL || thread->task->task_io_stats == NULL)
2384                 return;
2385
2386         if (io_flags & DKIO_READ) {
2387                 UPDATE_IO_STATS(thread->thread_io_stats->disk_reads, size);
2388                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->disk_reads, size);
2389         }
2390
2391         if (io_flags & DKIO_META) {
2392                 UPDATE_IO_STATS(thread->thread_io_stats->metadata, size);
2393                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->metadata, size);
2394         }
2395
2396         if (io_flags & DKIO_PAGING) {
2397                 UPDATE_IO_STATS(thread->thread_io_stats->paging, size);
2398                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->paging, size);
2399         }
2400
2401         io_tier = ((io_flags & DKIO_TIER_MASK) >> DKIO_TIER_SHIFT);
2402         assert (io_tier < IO_NUM_PRIORITIES);
2403
2404         UPDATE_IO_STATS(thread->thread_io_stats->io_priority[io_tier], size);
2405         UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->io_priority[io_tier], size);
2406
2407         /* Update Total I/O Counts */
2408         UPDATE_IO_STATS(thread->thread_io_stats->total_io, size);
2409         UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->total_io, size);
2410
2411         if (!(io_flags & DKIO_READ)) {
2412                 DTRACE_IO3(physical_writes, struct task *, thread->task, uint32_t, size, int, io_flags);
2413                 ledger_credit(thread->task->ledger, task_ledgers.physical_writes, size);
2414         }
2415 }
2416
2417 static void
2418 init_thread_ledgers(void) {
2419         ledger_template_t t;
2420         int idx;
2421
2422         assert(thread_ledger_template == NULL);
2423
2424         if ((t = ledger_template_create("Per-thread ledger")) == NULL)
2425                 panic("couldn't create thread ledger template");
2426
2427         if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) {
2428                 panic("couldn't create cpu_time entry for thread ledger template");
2429         }
2430
2431         if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) {
2432                 panic("couldn't set thread ledger callback for cpu_time entry");
2433         }
2434
2435         thread_ledgers.cpu_time = idx;
2436
2437         ledger_template_complete(t);
2438         thread_ledger_template = t;
2439 }
2440
2441 /*
2442  * Returns currently applied CPU usage limit, or 0/0 if none is applied.
2443  */
2444 int
2445 thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns)
2446 {
2447         int64_t         abstime = 0;
2448         uint64_t        limittime = 0;
2449         thread_t        thread = current_thread();
2450
2451         *percentage  = 0;
2452         *interval_ns = 0;
2453         *action      = 0;
2454
2455         if (thread->t_threadledger == LEDGER_NULL) {
2456                 /*
2457                  * This thread has no per-thread ledger, so it can't possibly
2458                  * have a CPU limit applied.
2459                  */
2460                 return (KERN_SUCCESS);
2461         }
2462
2463         ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns);
2464         ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime);
2465
2466         if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) {
2467                 /*
2468                  * This thread's CPU time ledger has no period or limit; so it
2469                  * doesn't have a CPU limit applied.
2470                  */
2471                  return (KERN_SUCCESS);
2472         }
2473
2474         /*
2475          * This calculation is the converse to the one in thread_set_cpulimit().
2476          */
2477         absolutetime_to_nanoseconds(abstime, &limittime);
2478         *percentage = (limittime * 100ULL) / *interval_ns;
2479         assert(*percentage <= 100);
2480
2481         if (thread->options & TH_OPT_PROC_CPULIMIT) {
2482                 assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0);
2483
2484                 *action = THREAD_CPULIMIT_BLOCK;
2485         } else if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2486                 assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0);
2487
2488                 *action = THREAD_CPULIMIT_EXCEPTION;
2489         } else {
2490                 *action = THREAD_CPULIMIT_DISABLE;
2491         }
2492
2493         return (KERN_SUCCESS);
2494 }
2495
2496 /*
2497  * Set CPU usage limit on a thread.
2498  *
2499  * Calling with percentage of 0 will unset the limit for this thread.
2500  */
2501 int
2502 thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns)
2503 {
2504         thread_t        thread = current_thread();
2505         ledger_t        l;
2506         uint64_t        limittime = 0;
2507         uint64_t        abstime = 0;
2508
2509         assert(percentage <= 100);
2510
2511         if (action == THREAD_CPULIMIT_DISABLE) {
2512                 /*
2513                  * Remove CPU limit, if any exists.
2514                  */
2515                 if (thread->t_threadledger != LEDGER_NULL) {
2516                         l = thread->t_threadledger;
2517                         ledger_set_limit(l, thread_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
2518                         ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_IGNORE);
2519                         thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT);
2520                 }
2521
2522                 return (0);
2523         }
2524
2525         if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) {
2526                 return (KERN_INVALID_ARGUMENT);
2527         }
2528
2529         l = thread->t_threadledger;
2530         if (l == LEDGER_NULL) {
2531                 /*
2532                  * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active.
2533                  */
2534                 if ((l = ledger_instantiate(thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL)
2535                         return (KERN_RESOURCE_SHORTAGE);
2536
2537                 /*
2538                  * We are the first to create this thread's ledger, so only activate our entry.
2539                  */
2540                 ledger_entry_setactive(l, thread_ledgers.cpu_time);
2541                 thread->t_threadledger = l;
2542         }
2543
2544         /*
2545          * The limit is specified as a percentage of CPU over an interval in nanoseconds.
2546          * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit.
2547          */
2548         limittime = (interval_ns * percentage) / 100;
2549         nanoseconds_to_absolutetime(limittime, &abstime);
2550         ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct);
2551         /*
2552          * Refill the thread's allotted CPU time every interval_ns nanoseconds.
2553          */
2554         ledger_set_period(l, thread_ledgers.cpu_time, interval_ns);
2555
2556         if (action == THREAD_CPULIMIT_EXCEPTION) {
2557                 /*
2558                  * We don't support programming the CPU usage monitor on a task if any of its
2559                  * threads have a per-thread blocking CPU limit configured.
2560                  */
2561                 if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2562                         panic("CPU usage monitor activated, but blocking thread limit exists");
2563                 }
2564
2565                 /*
2566                  * Make a note that this thread's CPU limit is being used for the task-wide CPU
2567                  * usage monitor. We don't have to arm the callback which will trigger the
2568                  * exception, because that was done for us in ledger_instantiate (because the
2569                  * ledger template used has a default callback).
2570                  */
2571                 thread->options |= TH_OPT_PROC_CPULIMIT;
2572         } else {
2573                 /*
2574                  * We deliberately override any CPU limit imposed by a task-wide limit (eg
2575                  * CPU usage monitor).
2576                  */
2577                 thread->options &= ~TH_OPT_PROC_CPULIMIT;
2578
2579                 thread->options |= TH_OPT_PRVT_CPULIMIT;
2580                 /* The per-thread ledger template by default has a callback for CPU time */
2581                 ledger_disable_callback(l, thread_ledgers.cpu_time);
2582                 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
2583         }
2584
2585         return (0);
2586 }
2587
2588 static void
2589 sched_call_null(
2590 __unused        int                     type,
2591 __unused        thread_t        thread)
2592 {
2593         return;
2594 }
2595
2596 void
2597 thread_sched_call(
2598         thread_t                thread,
2599         sched_call_t    call)
2600 {
2601         thread->sched_call = (call != NULL)? call: sched_call_null;
2602 }
2603
2604 sched_call_t
2605 thread_disable_sched_call(
2606         thread_t                thread,
2607         sched_call_t    call)
2608 {
2609         if (call) {
2610                 spl_t s = splsched();
2611                 thread_lock(thread);
2612                 if (thread->sched_call == call) {
2613                         thread->sched_call = sched_call_null;
2614                 } else {
2615                         call = NULL;
2616                 }
2617                 thread_unlock(thread);
2618                 splx(s);
2619         }
2620         return call;
2621 }
2622
2623 void
2624 thread_reenable_sched_call(
2625         thread_t                thread,
2626         sched_call_t    call)
2627 {
2628         if (call) {
2629                 spl_t s = splsched();
2630                 thread_lock(thread);
2631                 thread_sched_call(thread, call);
2632                 thread_unlock(thread);
2633                 splx(s);
2634         }
2635 }
2636
2637 void
2638 thread_static_param(
2639         thread_t                thread,
2640         boolean_t               state)
2641 {
2642         thread_mtx_lock(thread);
2643         thread->static_param = state;
2644         thread_mtx_unlock(thread);
2645 }
2646
2647 uint64_t
2648 thread_tid(
2649         thread_t        thread)
2650 {
2651         return (thread != THREAD_NULL? thread->thread_id: 0);
2652 }
2653
2654 uint16_t        thread_set_tag(thread_t th, uint16_t tag) {
2655         return thread_set_tag_internal(th, tag);
2656 }
2657 uint16_t        thread_get_tag(thread_t th) {
2658         return thread_get_tag_internal(th);
2659 }
2660
2661 uint64_t
2662 thread_dispatchqaddr(
2663         thread_t                thread)
2664 {
2665         uint64_t        dispatchqueue_addr;
2666         uint64_t        thread_handle;
2667
2668         if (thread == THREAD_NULL)
2669                 return 0;
2670
2671         thread_handle = thread->machine.cthread_self;
2672         if (thread_handle == 0)
2673                 return 0;
2674
2675         if (thread->inspection == TRUE)
2676                 dispatchqueue_addr = thread_handle + get_task_dispatchqueue_offset(thread->task);
2677         else if (thread->task->bsd_info)
2678                 dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
2679         else
2680                 dispatchqueue_addr = 0;
2681
2682         return dispatchqueue_addr;
2683 }
2684
2685 uint64_t
2686 thread_rettokern_addr(
2687         thread_t                thread)
2688 {
2689         uint64_t        rettokern_addr;
2690         uint64_t        rettokern_offset;
2691         uint64_t        thread_handle;
2692
2693         if (thread == THREAD_NULL)
2694                 return 0;
2695
2696         thread_handle = thread->machine.cthread_self;
2697         if (thread_handle == 0)
2698                 return 0;
2699
2700         if (thread->task->bsd_info) {
2701                 rettokern_offset = get_return_to_kernel_offset_from_proc(thread->task->bsd_info);
2702
2703                 /* Return 0 if return to kernel offset is not initialized. */
2704                 if (rettokern_offset == 0) {
2705                         rettokern_addr = 0;
2706                 } else {
2707                         rettokern_addr = thread_handle + rettokern_offset;
2708                 }
2709         } else {
2710                 rettokern_addr = 0;
2711         }
2712
2713         return rettokern_addr;
2714 }
2715
2716 /*
2717  * Export routines to other components for things that are done as macros
2718  * within the osfmk component.
2719  */
2720
2721 #undef thread_reference
2722 void thread_reference(thread_t thread);
2723 void
2724 thread_reference(
2725         thread_t        thread)
2726 {
2727         if (thread != THREAD_NULL)
2728                 thread_reference_internal(thread);
2729 }
2730
2731 #undef thread_should_halt
2732
2733 boolean_t
2734 thread_should_halt(
2735         thread_t                th)
2736 {
2737         return (thread_should_halt_fast(th));
2738 }
2739
2740 /*
2741  * thread_set_voucher_name - reset the voucher port name bound to this thread
2742  *
2743  * Conditions:  nothing locked
2744  *
2745  *      If we already converted the previous name to a cached voucher
2746  *      reference, then we discard that reference here.  The next lookup
2747  *      will cache it again.
2748  */
2749
2750 kern_return_t
2751 thread_set_voucher_name(mach_port_name_t voucher_name)
2752 {
2753         thread_t thread = current_thread();
2754         ipc_voucher_t new_voucher = IPC_VOUCHER_NULL;
2755         ipc_voucher_t voucher;
2756         ledger_t bankledger = NULL;
2757         thread_group_t banktg = NULL;
2758
2759         if (MACH_PORT_DEAD == voucher_name)
2760                 return KERN_INVALID_RIGHT;
2761
2762         /*
2763          * agressively convert to voucher reference
2764          */
2765         if (MACH_PORT_VALID(voucher_name)) {
2766                 new_voucher = convert_port_name_to_voucher(voucher_name);
2767                 if (IPC_VOUCHER_NULL == new_voucher)
2768                         return KERN_INVALID_ARGUMENT;
2769         }
2770         bank_get_bank_ledger_and_thread_group(new_voucher, &bankledger, &banktg);
2771
2772         thread_mtx_lock(thread);
2773         voucher = thread->ith_voucher;
2774         thread->ith_voucher_name = voucher_name;
2775         thread->ith_voucher = new_voucher;
2776         thread_mtx_unlock(thread);
2777
2778         bank_swap_thread_bank_ledger(thread, bankledger);
2779
2780         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2781                                   MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2782                                   (uintptr_t)thread_tid(thread),
2783                                   (uintptr_t)voucher_name,
2784                                   VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
2785                                   1, 0);
2786
2787         if (IPC_VOUCHER_NULL != voucher)
2788                 ipc_voucher_release(voucher);
2789
2790         return KERN_SUCCESS;
2791 }
2792
2793 /*
2794  *  thread_get_mach_voucher - return a voucher reference for the specified thread voucher
2795  *
2796  *  Conditions:  nothing locked
2797  *
2798  *  A reference to the voucher may be lazily pending, if someone set the voucher name
2799  *  but nobody has done a lookup yet.  In that case, we'll have to do the equivalent
2800  *  lookup here.
2801  *
2802  *  NOTE:       At the moment, there is no distinction between the current and effective
2803  *              vouchers because we only set them at the thread level currently.
2804  */
2805 kern_return_t
2806 thread_get_mach_voucher(
2807         thread_act_t            thread,
2808         mach_voucher_selector_t __unused which,
2809         ipc_voucher_t           *voucherp)
2810 {
2811         ipc_voucher_t           voucher;
2812         mach_port_name_t        voucher_name;
2813
2814         if (THREAD_NULL == thread)
2815                 return KERN_INVALID_ARGUMENT;
2816
2817         thread_mtx_lock(thread);
2818         voucher = thread->ith_voucher;
2819
2820         /* if already cached, just return a ref */
2821         if (IPC_VOUCHER_NULL != voucher) {
2822                 ipc_voucher_reference(voucher);
2823                 thread_mtx_unlock(thread);
2824                 *voucherp = voucher;
2825                 return KERN_SUCCESS;
2826         }
2827
2828         voucher_name = thread->ith_voucher_name;
2829
2830         /* convert the name to a port, then voucher reference */
2831         if (MACH_PORT_VALID(voucher_name)) {
2832                 ipc_port_t port;
2833
2834                 if (KERN_SUCCESS !=
2835                     ipc_object_copyin(thread->task->itk_space, voucher_name,
2836                                       MACH_MSG_TYPE_COPY_SEND, (ipc_object_t *)&port)) {
2837                         thread->ith_voucher_name = MACH_PORT_NULL;
2838                         thread_mtx_unlock(thread);
2839                         *voucherp = IPC_VOUCHER_NULL;
2840                         return KERN_SUCCESS;
2841                 }
2842
2843                 /* convert to a voucher ref to return, and cache a ref on thread */
2844                 voucher = convert_port_to_voucher(port);
2845                 ipc_voucher_reference(voucher);
2846                 thread->ith_voucher = voucher;
2847                 thread_mtx_unlock(thread);
2848
2849                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2850                                           MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2851                                           (uintptr_t)thread_tid(thread),
2852                                           (uintptr_t)port,
2853                                           VM_KERNEL_ADDRPERM((uintptr_t)voucher),
2854                                           2, 0);
2855
2856
2857                 ipc_port_release_send(port);
2858         } else
2859                 thread_mtx_unlock(thread);
2860
2861         *voucherp = voucher;
2862         return KERN_SUCCESS;
2863 }
2864
2865 /*
2866  *  thread_set_mach_voucher - set a voucher reference for the specified thread voucher
2867  *
2868  *  Conditions: callers holds a reference on the voucher.
2869  *              nothing locked.
2870  *
2871  *  We grab another reference to the voucher and bind it to the thread.  Any lazy
2872  *  binding is erased.  The old voucher reference associated with the thread is
2873  *  discarded.
2874  */
2875 kern_return_t
2876 thread_set_mach_voucher(
2877         thread_t                thread,
2878         ipc_voucher_t           voucher)
2879 {
2880         ipc_voucher_t old_voucher;
2881         ledger_t bankledger = NULL;
2882         thread_group_t banktg = NULL;
2883
2884         if (THREAD_NULL == thread)
2885                 return KERN_INVALID_ARGUMENT;
2886
2887         if (thread != current_thread() && thread->started)
2888                 return KERN_INVALID_ARGUMENT;
2889
2890         ipc_voucher_reference(voucher);
2891         bank_get_bank_ledger_and_thread_group(voucher, &bankledger, &banktg);
2892
2893         thread_mtx_lock(thread);
2894         old_voucher = thread->ith_voucher;
2895         thread->ith_voucher = voucher;
2896         thread->ith_voucher_name = MACH_PORT_NULL;
2897         thread_mtx_unlock(thread);
2898
2899         bank_swap_thread_bank_ledger(thread, bankledger);
2900
2901         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2902                                   MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2903                                   (uintptr_t)thread_tid(thread),
2904                                   (uintptr_t)MACH_PORT_NULL,
2905                                   VM_KERNEL_ADDRPERM((uintptr_t)voucher),
2906                                   3, 0);
2907
2908         ipc_voucher_release(old_voucher);
2909
2910         return KERN_SUCCESS;
2911 }
2912
2913 /*
2914  *  thread_swap_mach_voucher - swap a voucher reference for the specified thread voucher
2915  *
2916  *  Conditions: callers holds a reference on the new and presumed old voucher(s).
2917  *              nothing locked.
2918  *
2919  *  If the old voucher is still the same as passed in, replace it with new voucher
2920  *  and discard the old (and the reference passed in).  Otherwise, discard the new
2921  *  and return an updated old voucher.
2922  */
2923 kern_return_t
2924 thread_swap_mach_voucher(
2925         thread_t                thread,
2926         ipc_voucher_t           new_voucher,
2927         ipc_voucher_t           *in_out_old_voucher)
2928 {
2929         mach_port_name_t old_voucher_name;
2930         ipc_voucher_t old_voucher;
2931         ledger_t bankledger = NULL;
2932         thread_group_t banktg = NULL;
2933
2934         if (THREAD_NULL == thread)
2935                 return KERN_INVALID_TASK;
2936
2937         if (thread != current_thread() && thread->started)
2938                 return KERN_INVALID_ARGUMENT;
2939
2940         bank_get_bank_ledger_and_thread_group(new_voucher, &bankledger, &banktg);
2941
2942         thread_mtx_lock(thread);
2943
2944         old_voucher = thread->ith_voucher;
2945
2946         if (IPC_VOUCHER_NULL == old_voucher) {
2947                 old_voucher_name = thread->ith_voucher_name;
2948
2949                 /* perform lazy binding if needed */
2950                 if (MACH_PORT_VALID(old_voucher_name)) {
2951                         old_voucher = convert_port_name_to_voucher(old_voucher_name);
2952                         thread->ith_voucher_name = MACH_PORT_NULL;
2953                         thread->ith_voucher = old_voucher;
2954
2955                         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2956                                                   MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2957                                                   (uintptr_t)thread_tid(thread),
2958                                                   (uintptr_t)old_voucher_name,
2959                                                   VM_KERNEL_ADDRPERM((uintptr_t)old_voucher),
2960                                                   4, 0);
2961
2962                 }
2963         }
2964
2965         /* swap in new voucher, if old voucher matches the one supplied */
2966         if (old_voucher == *in_out_old_voucher) {
2967                 ipc_voucher_reference(new_voucher);
2968                 thread->ith_voucher = new_voucher;
2969                 thread->ith_voucher_name = MACH_PORT_NULL;
2970                 thread_mtx_unlock(thread);
2971                 bank_swap_thread_bank_ledger(thread, bankledger);
2972
2973                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2974                                           MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2975                                           (uintptr_t)thread_tid(thread),
2976                                           (uintptr_t)MACH_PORT_NULL,
2977                                           VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
2978                                           5, 0);
2979
2980                 ipc_voucher_release(old_voucher);
2981
2982                 *in_out_old_voucher = IPC_VOUCHER_NULL;
2983                 return KERN_SUCCESS;
2984         }
2985
2986         /* Otherwise, just return old voucher reference */
2987         ipc_voucher_reference(old_voucher);
2988         thread_mtx_unlock(thread);
2989         *in_out_old_voucher = old_voucher;
2990         return KERN_SUCCESS;
2991 }
2992
2993 /*
2994  *  thread_get_current_voucher_origin_pid - get the pid of the originator of the current voucher.
2995  */
2996 kern_return_t
2997 thread_get_current_voucher_origin_pid(
2998         int32_t      *pid)
2999 {
3000         uint32_t buf_size;
3001         kern_return_t kr;
3002         thread_t thread = current_thread();
3003
3004         buf_size = sizeof(*pid);
3005         kr = mach_voucher_attr_command(thread->ith_voucher,
3006                 MACH_VOUCHER_ATTR_KEY_BANK,
3007                 BANK_ORIGINATOR_PID,
3008                 NULL,
3009                 0,
3010                 (mach_voucher_attr_content_t)pid,
3011                 &buf_size);
3012
3013         return kr;
3014 }
3015
3016
3017 boolean_t
3018 thread_has_thread_name(thread_t th)
3019 {
3020         if ((th) && (th->uthread)) {
3021                 return bsd_hasthreadname(th->uthread);
3022         }
3023
3024         /*
3025          * This is an odd case; clients may set the thread name based on the lack of
3026          * a name, but in this context there is no uthread to attach the name to.
3027          */
3028         return FALSE;
3029 }
3030
3031 void
3032 thread_set_thread_name(thread_t th, const char* name)
3033 {
3034         if ((th) && (th->uthread) && name) {
3035                 bsd_setthreadname(th->uthread, name);
3036         }
3037 }
3038
3039 void
3040 thread_set_honor_qlimit(thread_t thread)
3041 {
3042         thread->options |= TH_OPT_HONOR_QLIMIT;
3043 }
3044
3045 void
3046 thread_clear_honor_qlimit(thread_t thread)
3047 {
3048         thread->options &= (~TH_OPT_HONOR_QLIMIT);
3049 }
3050
3051 /*
3052  * thread_enable_send_importance - set/clear the SEND_IMPORTANCE thread option bit.
3053  */
3054 void thread_enable_send_importance(thread_t thread, boolean_t enable)
3055 {
3056         if (enable == TRUE)
3057                 thread->options |= TH_OPT_SEND_IMPORTANCE;
3058         else
3059                 thread->options &= ~TH_OPT_SEND_IMPORTANCE;
3060 }
3061
3062 /*
3063  * thread_set_allocation_name - .
3064  */
3065
3066 kern_allocation_name_t thread_set_allocation_name(kern_allocation_name_t new_name)
3067 {
3068         kern_allocation_name_t ret;
3069         thread_kernel_state_t kstate = thread_get_kernel_state(current_thread());
3070         ret = kstate->allocation_name;
3071         // fifo
3072         if (!new_name || !kstate->allocation_name) kstate->allocation_name = new_name;
3073         return ret;
3074 }
3075
3076 #if CONFIG_DTRACE
3077 uint32_t dtrace_get_thread_predcache(thread_t thread)
3078 {
3079         if (thread != THREAD_NULL)
3080                 return thread->t_dtrace_predcache;
3081         else
3082                 return 0;
3083 }
3084
3085 int64_t dtrace_get_thread_vtime(thread_t thread)
3086 {
3087         if (thread != THREAD_NULL)
3088                 return thread->t_dtrace_vtime;
3089         else
3090                 return 0;
3091 }
3092
3093 int dtrace_get_thread_last_cpu_id(thread_t thread)
3094 {
3095         if ((thread != THREAD_NULL) && (thread->last_processor != PROCESSOR_NULL)) {
3096                 return thread->last_processor->cpu_id;
3097         } else {
3098                 return -1;
3099         }
3100 }
3101
3102 int64_t dtrace_get_thread_tracing(thread_t thread)
3103 {
3104         if (thread != THREAD_NULL)
3105                 return thread->t_dtrace_tracing;
3106         else
3107                 return 0;
3108 }
3109
3110 boolean_t dtrace_get_thread_reentering(thread_t thread)
3111 {
3112         if (thread != THREAD_NULL)
3113                 return (thread->options & TH_OPT_DTRACE) ? TRUE : FALSE;
3114         else
3115                 return 0;
3116 }
3117
3118 vm_offset_t dtrace_get_kernel_stack(thread_t thread)
3119 {
3120         if (thread != THREAD_NULL)
3121                 return thread->kernel_stack;
3122         else
3123                 return 0;
3124 }
3125
3126 #if KASAN
3127 struct kasan_thread_data *
3128 kasan_get_thread_data(thread_t thread)
3129 {
3130         return &thread->kasan_data;
3131 }
3132 #endif
3133
3134 int64_t dtrace_calc_thread_recent_vtime(thread_t thread)
3135 {
3136         if (thread != THREAD_NULL) {
3137                 processor_t             processor = current_processor();
3138                 uint64_t                                abstime = mach_absolute_time();
3139                 timer_t                                 timer;
3140
3141                 timer = PROCESSOR_DATA(processor, thread_timer);
3142
3143                 return timer_grab(&(thread->system_timer)) + timer_grab(&(thread->user_timer)) +
3144                                 (abstime - timer->tstamp); /* XXX need interrupts off to prevent missed time? */
3145         } else
3146                 return 0;
3147 }
3148
3149 void dtrace_set_thread_predcache(thread_t thread, uint32_t predcache)
3150 {
3151         if (thread != THREAD_NULL)
3152                 thread->t_dtrace_predcache = predcache;
3153 }
3154
3155 void dtrace_set_thread_vtime(thread_t thread, int64_t vtime)
3156 {
3157         if (thread != THREAD_NULL)
3158                 thread->t_dtrace_vtime = vtime;
3159 }
3160
3161 void dtrace_set_thread_tracing(thread_t thread, int64_t accum)
3162 {
3163         if (thread != THREAD_NULL)
3164                 thread->t_dtrace_tracing = accum;
3165 }
3166
3167 void dtrace_set_thread_reentering(thread_t thread, boolean_t vbool)
3168 {
3169         if (thread != THREAD_NULL) {
3170                 if (vbool)
3171                         thread->options |= TH_OPT_DTRACE;
3172                 else
3173                         thread->options &= (~TH_OPT_DTRACE);
3174         }
3175 }
3176
3177 vm_offset_t dtrace_set_thread_recover(thread_t thread, vm_offset_t recover)
3178 {
3179         vm_offset_t prev = 0;
3180
3181         if (thread != THREAD_NULL) {
3182                 prev = thread->recover;
3183                 thread->recover = recover;
3184         }
3185         return prev;
3186 }
3187
3188 void dtrace_thread_bootstrap(void)
3189 {
3190         task_t task = current_task();
3191
3192         if (task->thread_count == 1) {
3193                 thread_t thread = current_thread();
3194                 if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) {
3195                         thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS;
3196                         DTRACE_PROC(exec__success);
3197                         KDBG(BSDDBG_CODE(DBG_BSD_PROC,BSD_PROC_EXEC),
3198                              task_pid(task));
3199                 }
3200                 DTRACE_PROC(start);
3201         }
3202         DTRACE_PROC(lwp__start);
3203
3204 }
3205
3206 void
3207 dtrace_thread_didexec(thread_t thread)
3208 {
3209         thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS;
3210 }
3211 #endif /* CONFIG_DTRACE */