osfmk/kern/thread.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   kern/thread.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub
  61  *      Date:   1986
  62  *
  63  *      Thread management primitives implementation.
  64  */
  65 /*
  66  * Copyright (c) 1993 The University of Utah and
  67  * the Computer Systems Laboratory (CSL).  All rights reserved.
  68  *
  69  * Permission to use, copy, modify and distribute this software and its
  70  * documentation is hereby granted, provided that both the copyright
  71  * notice and this permission notice appear in all copies of the
  72  * software, derivative works or modified versions, and any portions
  73  * thereof, and that both notices appear in supporting documentation.
  74  *
  75  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  76  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  77  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  78  *
  79  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  80  * improvements that they make and grant CSL redistribution rights.
  81  *
  82  */
  83
  84 #include <mach/mach_types.h>
  85 #include <mach/boolean.h>
  86 #include <mach/policy.h>
  87 #include <mach/thread_info.h>
  88 #include <mach/thread_special_ports.h>
  89 #include <mach/thread_status.h>
  90 #include <mach/time_value.h>
  91 #include <mach/vm_param.h>
  92
  93 #include <machine/thread.h>
  94 #include <machine/pal_routines.h>
  95 #include <machine/limits.h>
  96
  97 #include <kern/kern_types.h>
  98 #include <kern/kalloc.h>
  99 #include <kern/cpu_data.h>
 100 #include <kern/counters.h>
 101 #include <kern/extmod_statistics.h>
 102 #include <kern/ipc_mig.h>
 103 #include <kern/ipc_tt.h>
 104 #include <kern/mach_param.h>
 105 #include <kern/machine.h>
 106 #include <kern/misc_protos.h>
 107 #include <kern/processor.h>
 108 #include <kern/queue.h>
 109 #include <kern/sched.h>
 110 #include <kern/sched_prim.h>
 111 #include <kern/sync_lock.h>
 112 #include <kern/syscall_subr.h>
 113 #include <kern/task.h>
 114 #include <kern/thread.h>
 115 #include <kern/host.h>
 116 #include <kern/zalloc.h>
 117 #include <kern/assert.h>
 118 #include <kern/exc_resource.h>
 119 #include <kern/telemetry.h>
 120 #include <kern/policy_internal.h>
 121
 122 #include <corpses/task_corpse.h>
 123 #if KPC
 124 #include <kern/kpc.h>
 125 #endif
 126
 127 #include <ipc/ipc_kmsg.h>
 128 #include <ipc/ipc_port.h>
 129 #include <bank/bank_types.h>
 130
 131 #include <vm/vm_kern.h>
 132 #include <vm/vm_pageout.h>
 133
 134 #include <sys/kdebug.h>
 135 #include <sys/bsdtask_info.h>
 136 #include <mach/sdt.h>
 137
 138 /*
 139  * Exported interfaces
 140  */
 141 #include <mach/task_server.h>
 142 #include <mach/thread_act_server.h>
 143 #include <mach/mach_host_server.h>
 144 #include <mach/host_priv_server.h>
 145 #include <mach/mach_voucher_server.h>
 146 #include <kern/policy_internal.h>
 147
 148 static struct zone                      *thread_zone;
 149 static lck_grp_attr_t           thread_lck_grp_attr;
 150 lck_attr_t                                      thread_lck_attr;
 151 lck_grp_t                                       thread_lck_grp;
 152
 153 struct zone                                     *thread_qos_override_zone;
 154
 155 decl_simple_lock_data(static,thread_stack_lock)
 156 static queue_head_t             thread_stack_queue;
 157
 158 decl_simple_lock_data(static,thread_terminate_lock)
 159 static queue_head_t             thread_terminate_queue;
 160
 161 static queue_head_t             crashed_threads_queue;
 162
 163 decl_simple_lock_data(static,thread_exception_lock)
 164 static queue_head_t             thread_exception_queue;
 165
 166 struct thread_exception_elt {
 167         queue_chain_t   elt;
 168         task_t          exception_task;
 169         thread_t        exception_thread;
 170 };
 171
 172 static struct thread    thread_template, init_thread;
 173
 174 static void             sched_call_null(
 175                                         int                     type,
 176                                         thread_t        thread);
 177
 178 #ifdef MACH_BSD
 179 extern void proc_exit(void *);
 180 extern mach_exception_data_type_t proc_encode_exit_exception_code(void *);
 181 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 182 extern int      proc_selfpid(void);
 183 extern char *   proc_name_address(void *p);
 184 #endif /* MACH_BSD */
 185
 186 extern int disable_exc_resource;
 187 extern int audio_active;
 188 extern int debug_task;
 189 int thread_max = CONFIG_THREAD_MAX;     /* Max number of threads */
 190 int task_threadmax = CONFIG_THREAD_MAX;
 191
 192 static uint64_t         thread_unique_id = 100;
 193
 194 struct _thread_ledger_indices thread_ledgers = { -1 };
 195 static ledger_template_t thread_ledger_template = NULL;
 196 static void init_thread_ledgers(void);
 197
 198 #if CONFIG_JETSAM
 199 void jetsam_on_ledger_cpulimit_exceeded(void);
 200 #endif
 201
 202 /*
 203  * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry.
 204  *
 205  * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user
 206  *  stacktraces, aka micro-stackshots)
 207  */
 208 #define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70
 209
 210 int cpumon_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 211 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void);
 212
 213 /*
 214  * The smallest interval over which we support limiting CPU consumption is 1ms
 215  */
 216 #define MINIMUM_CPULIMIT_INTERVAL_MS 1
 217
 218 void
 219 thread_bootstrap(void)
 220 {
 221         /*
 222          *      Fill in a template thread for fast initialization.
 223          */
 224
 225 #if MACH_ASSERT
 226         thread_template.thread_magic = THREAD_MAGIC;
 227 #endif /* MACH_ASSERT */
 228
 229         thread_template.runq = PROCESSOR_NULL;
 230
 231         thread_template.ref_count = 2;
 232
 233         thread_template.reason = AST_NONE;
 234         thread_template.at_safe_point = FALSE;
 235         thread_template.wait_event = NO_EVENT64;
 236         thread_template.waitq = NULL;
 237         thread_template.wait_result = THREAD_WAITING;
 238         thread_template.options = THREAD_ABORTSAFE;
 239         thread_template.state = TH_WAIT | TH_UNINT;
 240         thread_template.wake_active = FALSE;
 241         thread_template.continuation = THREAD_CONTINUE_NULL;
 242         thread_template.parameter = NULL;
 243
 244         thread_template.importance = 0;
 245         thread_template.sched_mode = TH_MODE_NONE;
 246         thread_template.sched_flags = 0;
 247         thread_template.saved_mode = TH_MODE_NONE;
 248         thread_template.safe_release = 0;
 249         thread_template.th_sched_bucket = TH_BUCKET_RUN;
 250
 251         thread_template.sfi_class = SFI_CLASS_UNSPECIFIED;
 252         thread_template.sfi_wait_class = SFI_CLASS_UNSPECIFIED;
 253
 254         thread_template.active = 0;
 255         thread_template.started = 0;
 256         thread_template.static_param = 0;
 257         thread_template.policy_reset = 0;
 258
 259         thread_template.base_pri = BASEPRI_DEFAULT;
 260         thread_template.sched_pri = 0;
 261         thread_template.max_priority = 0;
 262         thread_template.task_priority = 0;
 263         thread_template.promotions = 0;
 264         thread_template.pending_promoter_index = 0;
 265         thread_template.pending_promoter[0] = NULL;
 266         thread_template.pending_promoter[1] = NULL;
 267         thread_template.rwlock_count = 0;
 268
 269
 270         thread_template.realtime.deadline = UINT64_MAX;
 271
 272         thread_template.quantum_remaining = 0;
 273         thread_template.last_run_time = 0;
 274         thread_template.last_made_runnable_time = 0;
 275
 276         thread_template.computation_metered = 0;
 277         thread_template.computation_epoch = 0;
 278
 279 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
 280         thread_template.sched_stamp = 0;
 281         thread_template.pri_shift = INT8_MAX;
 282         thread_template.sched_usage = 0;
 283         thread_template.cpu_usage = thread_template.cpu_delta = 0;
 284 #endif
 285         thread_template.c_switch = thread_template.p_switch = thread_template.ps_switch = 0;
 286
 287         thread_template.bound_processor = PROCESSOR_NULL;
 288         thread_template.last_processor = PROCESSOR_NULL;
 289
 290         thread_template.sched_call = sched_call_null;
 291
 292         timer_init(&thread_template.user_timer);
 293         timer_init(&thread_template.system_timer);
 294         thread_template.user_timer_save = 0;
 295         thread_template.system_timer_save = 0;
 296         thread_template.vtimer_user_save = 0;
 297         thread_template.vtimer_prof_save = 0;
 298         thread_template.vtimer_rlim_save = 0;
 299         thread_template.vtimer_qos_save  = 0;
 300
 301 #if CONFIG_SCHED_SFI
 302         thread_template.wait_sfi_begin_time = 0;
 303 #endif
 304
 305         thread_template.wait_timer_is_set = FALSE;
 306         thread_template.wait_timer_active = 0;
 307
 308         thread_template.depress_timer_active = 0;
 309
 310         thread_template.recover = (vm_offset_t)NULL;
 311
 312         thread_template.map = VM_MAP_NULL;
 313
 314 #if CONFIG_DTRACE
 315         thread_template.t_dtrace_predcache = 0;
 316         thread_template.t_dtrace_vtime = 0;
 317         thread_template.t_dtrace_tracing = 0;
 318 #endif /* CONFIG_DTRACE */
 319
 320 #if KPERF
 321         thread_template.kperf_flags = 0;
 322         thread_template.kperf_pet_gen = 0;
 323         thread_template.kperf_c_switch = 0;
 324         thread_template.kperf_pet_cnt = 0;
 325 #endif
 326
 327 #if KPC
 328         thread_template.kpc_buf = NULL;
 329 #endif
 330
 331 #if HYPERVISOR
 332         thread_template.hv_thread_target = NULL;
 333 #endif /* HYPERVISOR */
 334
 335 #if (DEVELOPMENT || DEBUG)
 336         thread_template.t_page_creation_throttled_hard = 0;
 337         thread_template.t_page_creation_throttled_soft = 0;
 338 #endif /* DEVELOPMENT || DEBUG */
 339         thread_template.t_page_creation_throttled = 0;
 340         thread_template.t_page_creation_count = 0;
 341         thread_template.t_page_creation_time = 0;
 342
 343         thread_template.affinity_set = NULL;
 344
 345         thread_template.syscalls_unix = 0;
 346         thread_template.syscalls_mach = 0;
 347
 348         thread_template.t_ledger = LEDGER_NULL;
 349         thread_template.t_threadledger = LEDGER_NULL;
 350 #ifdef CONFIG_BANK
 351         thread_template.t_bankledger = LEDGER_NULL;
 352         thread_template.t_deduct_bank_ledger_time = 0;
 353 #endif
 354
 355         thread_template.requested_policy = (struct thread_requested_policy) {};
 356         thread_template.effective_policy = (struct thread_effective_policy) {};
 357
 358         bzero(&thread_template.overrides, sizeof(thread_template.overrides));
 359
 360         thread_template.iotier_override = THROTTLE_LEVEL_NONE;
 361         thread_template.thread_io_stats = NULL;
 362         thread_template.thread_callout_interrupt_wakeups = thread_template.thread_callout_platform_idle_wakeups = 0;
 363
 364         thread_template.thread_timer_wakeups_bin_1 = thread_template.thread_timer_wakeups_bin_2 = 0;
 365         thread_template.callout_woken_from_icontext = thread_template.callout_woken_from_platform_idle = 0;
 366
 367         thread_template.thread_tag = 0;
 368
 369         thread_template.ith_voucher_name = MACH_PORT_NULL;
 370         thread_template.ith_voucher = IPC_VOUCHER_NULL;
 371
 372         thread_template.work_interval_id = 0;
 373
 374         init_thread = thread_template;
 375         machine_set_current_thread(&init_thread);
 376 }
 377
 378 extern boolean_t allow_qos_policy_set;
 379
 380 void
 381 thread_init(void)
 382 {
 383         thread_zone = zinit(
 384                         sizeof(struct thread),
 385                         thread_max * sizeof(struct thread),
 386                         THREAD_CHUNK * sizeof(struct thread),
 387                         "threads");
 388
 389         thread_qos_override_zone = zinit(
 390                 sizeof(struct thread_qos_override),
 391                 4 * thread_max * sizeof(struct thread_qos_override),
 392                 PAGE_SIZE,
 393                 "thread qos override");
 394         zone_change(thread_qos_override_zone, Z_EXPAND, TRUE);
 395         zone_change(thread_qos_override_zone, Z_COLLECT, TRUE);
 396         zone_change(thread_qos_override_zone, Z_CALLERACCT, FALSE);
 397         zone_change(thread_qos_override_zone, Z_NOENCRYPT, TRUE);
 398
 399         lck_grp_attr_setdefault(&thread_lck_grp_attr);
 400         lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr);
 401         lck_attr_setdefault(&thread_lck_attr);
 402
 403         stack_init();
 404
 405         thread_policy_init();
 406
 407         /*
 408          *      Initialize any machine-dependent
 409          *      per-thread structures necessary.
 410          */
 411         machine_thread_init();
 412
 413         if (!PE_parse_boot_argn("cpumon_ustackshots_trigger_pct", &cpumon_ustackshots_trigger_pct,
 414                 sizeof (cpumon_ustackshots_trigger_pct))) {
 415                 cpumon_ustackshots_trigger_pct = CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT;
 416         }
 417
 418         PE_parse_boot_argn("-qos-policy-allow", &allow_qos_policy_set, sizeof(allow_qos_policy_set));
 419
 420         init_thread_ledgers();
 421 }
 422
 423 void
 424 thread_corpse_continue(void)
 425 {
 426         thread_t thread = current_thread();
 427
 428         thread_terminate_internal(thread);
 429         ml_set_interrupts_enabled(FALSE);
 430         ast_taken(AST_APC, TRUE);
 431
 432         panic("thread_corpse_continue");
 433         /*NOTREACHED*/
 434 }
 435
 436 static void
 437 thread_terminate_continue(void)
 438 {
 439         panic("thread_terminate_continue");
 440         /*NOTREACHED*/
 441 }
 442
 443 /*
 444  *      thread_terminate_self:
 445  */
 446 void
 447 thread_terminate_self(void)
 448 {
 449         thread_t                thread = current_thread();
 450         task_t                  task;
 451         spl_t                   s;
 452         int threadcnt;
 453
 454         pal_thread_terminate_self(thread);
 455
 456         DTRACE_PROC(lwp__exit);
 457
 458         thread_mtx_lock(thread);
 459
 460         ipc_thread_disable(thread);
 461
 462         thread_mtx_unlock(thread);
 463
 464         s = splsched();
 465         thread_lock(thread);
 466
 467         /*
 468          *      Cancel priority depression, wait for concurrent expirations
 469          *      on other processors.
 470          */
 471         if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
 472                 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
 473
 474                 /* If our priority was low because of a depressed yield, restore it in case we block below */
 475                 thread_recompute_sched_pri(thread, FALSE);
 476
 477                 if (timer_call_cancel(&thread->depress_timer))
 478                         thread->depress_timer_active--;
 479         }
 480
 481         while (thread->depress_timer_active > 0) {
 482                 thread_unlock(thread);
 483                 splx(s);
 484
 485                 delay(1);
 486
 487                 s = splsched();
 488                 thread_lock(thread);
 489         }
 490
 491         thread_sched_call(thread, NULL);
 492
 493         thread_unlock(thread);
 494         splx(s);
 495
 496
 497         thread_mtx_lock(thread);
 498
 499         thread_policy_reset(thread);
 500
 501         thread_mtx_unlock(thread);
 502
 503         task = thread->task;
 504         uthread_cleanup(task, thread->uthread, task->bsd_info);
 505         threadcnt = hw_atomic_sub(&task->active_thread_count, 1);
 506
 507         if (task->bsd_info) {
 508                 /* trace out pid before we sign off */
 509                 long    dbg_arg1 = 0;
 510
 511                 kdbg_trace_data(thread->task->bsd_info, &dbg_arg1);
 512
 513                 KERNEL_DEBUG_CONSTANT(TRACE_DATA_THREAD_TERMINATE_PID | DBG_FUNC_NONE,
 514                         dbg_arg1, 0, 0, 0, 0);
 515         }
 516
 517         /*
 518          * If we are the last thread to terminate and the task is
 519          * associated with a BSD process, perform BSD process exit.
 520          */
 521         if (threadcnt == 0 && task->bsd_info != NULL) {
 522                 mach_exception_data_type_t subcode = 0;
 523                 {
 524                         /* since we're the last thread in this process, trace out the command name too */
 525                         long    dbg_arg1 = 0, dbg_arg2 = 0, dbg_arg3 = 0, dbg_arg4 = 0;
 526
 527                         kdbg_trace_string(thread->task->bsd_info, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
 528
 529                         KERNEL_DEBUG_CONSTANT(TRACE_STRING_PROC_EXIT | DBG_FUNC_NONE,
 530                                 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
 531                 }
 532
 533                 /* Get the exit reason before proc_exit */
 534                 subcode = proc_encode_exit_exception_code(task->bsd_info);
 535                 proc_exit(task->bsd_info);
 536                 /*
 537                  * if there is crash info in task
 538                  * then do the deliver action since this is
 539                  * last thread for this task.
 540                  */
 541                 if (task->corpse_info) {
 542                         task_deliver_crash_notification(task, current_thread(), subcode);
 543                 }
 544         }
 545
 546         if (threadcnt == 0) {
 547                 task_lock(task);
 548                 if (task_is_a_corpse_fork(task)) {
 549                         thread_wakeup((event_t)&task->active_thread_count);
 550                 }
 551                 task_unlock(task);
 552         }
 553
 554         uthread_cred_free(thread->uthread);
 555
 556         s = splsched();
 557         thread_lock(thread);
 558
 559         /*
 560          *      Cancel wait timer, and wait for
 561          *      concurrent expirations.
 562          */
 563         if (thread->wait_timer_is_set) {
 564                 thread->wait_timer_is_set = FALSE;
 565
 566                 if (timer_call_cancel(&thread->wait_timer))
 567                         thread->wait_timer_active--;
 568         }
 569
 570         while (thread->wait_timer_active > 0) {
 571                 thread_unlock(thread);
 572                 splx(s);
 573
 574                 delay(1);
 575
 576                 s = splsched();
 577                 thread_lock(thread);
 578         }
 579
 580         /*
 581          *      If there is a reserved stack, release it.
 582          */
 583         if (thread->reserved_stack != 0) {
 584                 stack_free_reserved(thread);
 585                 thread->reserved_stack = 0;
 586         }
 587
 588         /*
 589          *      Mark thread as terminating, and block.
 590          */
 591         thread->state |= TH_TERMINATE;
 592         thread_mark_wait_locked(thread, THREAD_UNINT);
 593         assert((thread->sched_flags & TH_SFLAG_PROMOTED) == 0);
 594         assert(thread->promotions == 0);
 595         assert(!(thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED));
 596         assert(thread->rwlock_count == 0);
 597         thread_unlock(thread);
 598         /* splsched */
 599
 600         thread_block((thread_continue_t)thread_terminate_continue);
 601         /*NOTREACHED*/
 602 }
 603
 604 /* Drop a thread refcount that definitely isn't the last one. */
 605 void
 606 thread_deallocate_safe(thread_t thread)
 607 {
 608         assert_thread_magic(thread);
 609
 610         uint32_t old_refcount = hw_atomic_sub(&(thread)->ref_count, 1) + 1;
 611
 612         if (__improbable(old_refcount <= 1))
 613                 panic("bad thread refcount: %d", old_refcount);
 614 }
 615
 616 void
 617 thread_deallocate(
 618         thread_t                        thread)
 619 {
 620         task_t                          task;
 621
 622         if (thread == THREAD_NULL)
 623                 return;
 624
 625         assert_thread_magic(thread);
 626         assert(thread->ref_count > 0);
 627
 628         if (__probable(hw_atomic_sub(&(thread)->ref_count, 1) > 0))
 629                 return;
 630
 631         if(!(thread->state & TH_TERMINATE2))
 632                 panic("thread_deallocate: thread not properly terminated\n");
 633
 634         assert(thread->runq == PROCESSOR_NULL);
 635
 636         assert(thread->user_promotions == 0);
 637
 638 #if KPC
 639         kpc_thread_destroy(thread);
 640 #endif
 641
 642         ipc_thread_terminate(thread);
 643
 644         proc_thread_qos_deallocate(thread);
 645
 646         task = thread->task;
 647
 648 #ifdef MACH_BSD
 649         {
 650                 void *ut = thread->uthread;
 651
 652                 thread->uthread = NULL;
 653                 uthread_zone_free(ut);
 654         }
 655 #endif  /* MACH_BSD */
 656
 657         if (thread->t_ledger)
 658                 ledger_dereference(thread->t_ledger);
 659         if (thread->t_threadledger)
 660                 ledger_dereference(thread->t_threadledger);
 661
 662         if (IPC_VOUCHER_NULL != thread->ith_voucher)
 663                 ipc_voucher_release(thread->ith_voucher);
 664
 665         if (thread->thread_io_stats)
 666                 kfree(thread->thread_io_stats, sizeof(struct io_stat_info));
 667
 668         if (thread->kernel_stack != 0)
 669                 stack_free(thread);
 670
 671         lck_mtx_destroy(&thread->mutex, &thread_lck_grp);
 672         machine_thread_destroy(thread);
 673
 674         task_deallocate(task);
 675
 676 #if MACH_ASSERT
 677         assert_thread_magic(thread);
 678         thread->thread_magic = 0;
 679 #endif /* MACH_ASSERT */
 680
 681         zfree(thread_zone, thread);
 682 }
 683
 684 /*
 685  *      thread_exception_daemon:
 686  *
 687  *      Deliver EXC_RESOURCE exception
 688  */
 689 static void
 690 thread_exception_daemon(void)
 691 {
 692         struct thread_exception_elt *elt;
 693         task_t task;
 694         thread_t thread;
 695
 696         simple_lock(&thread_exception_lock);
 697         while ((elt = (struct thread_exception_elt *)dequeue_head(&thread_exception_queue)) != NULL) {
 698                 simple_unlock(&thread_exception_lock);
 699
 700                 task = elt->exception_task;
 701                 thread = elt->exception_thread;
 702                 assert_thread_magic(thread);
 703
 704                 kfree(elt, sizeof(struct thread_exception_elt));
 705
 706                 /* wait for all the threads in the task to terminate */
 707                 task_lock(task);
 708                 task_wait_till_threads_terminate_locked(task);
 709                 task_unlock(task);
 710
 711                 /* Consumes the task ref returned by task_generate_corpse_internal */
 712                 task_deallocate(task);
 713                 /* Consumes the thread ref returned by task_generate_corpse_internal */
 714                 thread_deallocate(thread);
 715
 716                 /* Deliver the EXC_RESOURCE notification, also clears the corpse. */
 717                 task_deliver_crash_notification(task, thread, 0);
 718
 719                 simple_lock(&thread_exception_lock);
 720         }
 721
 722         assert_wait((event_t)&thread_exception_queue, THREAD_UNINT);
 723         simple_unlock(&thread_exception_lock);
 724
 725         thread_block((thread_continue_t)thread_exception_daemon);
 726 }
 727
 728 /*
 729  *      thread_exception_enqueue:
 730  *
 731  *      Enqueue a corpse port to be delivered an EXC_RESOURCE.
 732  */
 733 void
 734 thread_exception_enqueue(
 735         task_t          task,
 736         thread_t        thread)
 737 {
 738         struct thread_exception_elt *elt = (struct thread_exception_elt*) kalloc(
 739                                                 sizeof(struct thread_exception_elt));
 740
 741         elt->exception_task = task;
 742         elt->exception_thread = thread;
 743
 744         simple_lock(&thread_exception_lock);
 745         enqueue_tail(&thread_exception_queue, (queue_entry_t)elt);
 746         simple_unlock(&thread_exception_lock);
 747
 748         thread_wakeup((event_t)&thread_exception_queue);
 749 }
 750
 751 /*
 752  *      thread_copy_resource_info
 753  *
 754  *      Copy the resource info counters from source
 755  *      thread to destination thread.
 756  */
 757 void
 758 thread_copy_resource_info(
 759         thread_t dst_thread,
 760         thread_t src_thread)
 761 {
 762         dst_thread->thread_tag = src_thread->thread_tag;
 763         dst_thread->c_switch = src_thread->c_switch;
 764         dst_thread->p_switch = src_thread->p_switch;
 765         dst_thread->ps_switch = src_thread->ps_switch;
 766         dst_thread->precise_user_kernel_time = src_thread->precise_user_kernel_time;
 767         dst_thread->user_timer = src_thread->user_timer;
 768         dst_thread->user_timer_save = src_thread->user_timer_save;
 769         dst_thread->system_timer_save = src_thread->system_timer_save;
 770         dst_thread->syscalls_unix = src_thread->syscalls_unix;
 771         dst_thread->syscalls_mach = src_thread->syscalls_mach;
 772         ledger_rollup(dst_thread->t_threadledger, src_thread->t_threadledger);
 773         *dst_thread->thread_io_stats = *src_thread->thread_io_stats;
 774
 775 }
 776
 777 /*
 778  *      thread_terminate_daemon:
 779  *
 780  *      Perform final clean up for terminating threads.
 781  */
 782 static void
 783 thread_terminate_daemon(void)
 784 {
 785         thread_t        self, thread;
 786         task_t          task;
 787
 788         self = current_thread();
 789         self->options |= TH_OPT_SYSTEM_CRITICAL;
 790
 791         (void)splsched();
 792         simple_lock(&thread_terminate_lock);
 793
 794         while ((thread = qe_dequeue_head(&thread_terminate_queue, struct thread, runq_links)) != THREAD_NULL) {
 795                 assert_thread_magic(thread);
 796
 797                 /*
 798                  * if marked for crash reporting, skip reaping.
 799                  * The corpse delivery thread will clear bit and enqueue
 800                  * for reaping when done
 801                  */
 802                 if (thread->inspection){
 803                         enqueue_tail(&crashed_threads_queue, &thread->runq_links);
 804                         continue;
 805                 }
 806
 807                 simple_unlock(&thread_terminate_lock);
 808                 (void)spllo();
 809
 810                 task = thread->task;
 811
 812                 task_lock(task);
 813                 task->total_user_time += timer_grab(&thread->user_timer);
 814                 if (thread->precise_user_kernel_time) {
 815                         task->total_system_time += timer_grab(&thread->system_timer);
 816                 } else {
 817                         task->total_user_time += timer_grab(&thread->system_timer);
 818                 }
 819
 820                 task->c_switch += thread->c_switch;
 821                 task->p_switch += thread->p_switch;
 822                 task->ps_switch += thread->ps_switch;
 823
 824                 task->syscalls_unix += thread->syscalls_unix;
 825                 task->syscalls_mach += thread->syscalls_mach;
 826
 827                 task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
 828                 task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
 829                 task->task_gpu_ns += ml_gpu_stat(thread);
 830                 task->task_energy += ml_energy_stat(thread);
 831
 832                 thread_update_qos_cpu_time(thread);
 833
 834                 queue_remove(&task->threads, thread, thread_t, task_threads);
 835                 task->thread_count--;
 836
 837                 /*
 838                  * If the task is being halted, and there is only one thread
 839                  * left in the task after this one, then wakeup that thread.
 840                  */
 841                 if (task->thread_count == 1 && task->halting)
 842                         thread_wakeup((event_t)&task->halting);
 843
 844                 task_unlock(task);
 845
 846                 lck_mtx_lock(&tasks_threads_lock);
 847                 queue_remove(&threads, thread, thread_t, threads);
 848                 threads_count--;
 849                 lck_mtx_unlock(&tasks_threads_lock);
 850
 851                 thread_deallocate(thread);
 852
 853                 (void)splsched();
 854                 simple_lock(&thread_terminate_lock);
 855         }
 856
 857         assert_wait((event_t)&thread_terminate_queue, THREAD_UNINT);
 858         simple_unlock(&thread_terminate_lock);
 859         /* splsched */
 860
 861         self->options &= ~TH_OPT_SYSTEM_CRITICAL;
 862         thread_block((thread_continue_t)thread_terminate_daemon);
 863         /*NOTREACHED*/
 864 }
 865
 866 /*
 867  *      thread_terminate_enqueue:
 868  *
 869  *      Enqueue a terminating thread for final disposition.
 870  *
 871  *      Called at splsched.
 872  */
 873 void
 874 thread_terminate_enqueue(
 875         thread_t                thread)
 876 {
 877         KERNEL_DEBUG_CONSTANT(TRACE_DATA_THREAD_TERMINATE | DBG_FUNC_NONE, thread->thread_id, 0, 0, 0, 0);
 878
 879         simple_lock(&thread_terminate_lock);
 880         enqueue_tail(&thread_terminate_queue, &thread->runq_links);
 881         simple_unlock(&thread_terminate_lock);
 882
 883         thread_wakeup((event_t)&thread_terminate_queue);
 884 }
 885
 886 /*
 887  * thread_terminate_crashed_threads:
 888  * walk the list of crashed threads and put back set of threads
 889  * who are no longer being inspected.
 890  */
 891 void
 892 thread_terminate_crashed_threads()
 893 {
 894         thread_t th_remove;
 895         boolean_t should_wake_terminate_queue = FALSE;
 896
 897         simple_lock(&thread_terminate_lock);
 898         /*
 899          * loop through the crashed threads queue
 900          * to put any threads that are not being inspected anymore
 901          */
 902
 903         qe_foreach_element_safe(th_remove, &crashed_threads_queue, runq_links) {
 904                 /* make sure current_thread is never in crashed queue */
 905                 assert(th_remove != current_thread());
 906
 907                 if (th_remove->inspection == FALSE) {
 908                         re_queue_tail(&thread_terminate_queue, &th_remove->runq_links);
 909                         should_wake_terminate_queue = TRUE;
 910                 }
 911         }
 912
 913         simple_unlock(&thread_terminate_lock);
 914         if (should_wake_terminate_queue == TRUE) {
 915                 thread_wakeup((event_t)&thread_terminate_queue);
 916         }
 917 }
 918
 919 /*
 920  *      thread_stack_daemon:
 921  *
 922  *      Perform stack allocation as required due to
 923  *      invoke failures.
 924  */
 925 static void
 926 thread_stack_daemon(void)
 927 {
 928         thread_t                thread;
 929         spl_t                   s;
 930
 931         s = splsched();
 932         simple_lock(&thread_stack_lock);
 933
 934         while ((thread = qe_dequeue_head(&thread_stack_queue, struct thread, runq_links)) != THREAD_NULL) {
 935                 assert_thread_magic(thread);
 936
 937                 simple_unlock(&thread_stack_lock);
 938                 splx(s);
 939
 940                 /* allocate stack with interrupts enabled so that we can call into VM */
 941                 stack_alloc(thread);
 942
 943                 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_END, thread_tid(thread), 0, 0, 0, 0);
 944
 945                 s = splsched();
 946                 thread_lock(thread);
 947                 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
 948                 thread_unlock(thread);
 949
 950                 simple_lock(&thread_stack_lock);
 951         }
 952
 953         assert_wait((event_t)&thread_stack_queue, THREAD_UNINT);
 954         simple_unlock(&thread_stack_lock);
 955         splx(s);
 956
 957         thread_block((thread_continue_t)thread_stack_daemon);
 958         /*NOTREACHED*/
 959 }
 960
 961 /*
 962  *      thread_stack_enqueue:
 963  *
 964  *      Enqueue a thread for stack allocation.
 965  *
 966  *      Called at splsched.
 967  */
 968 void
 969 thread_stack_enqueue(
 970         thread_t                thread)
 971 {
 972         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_START, thread_tid(thread), 0, 0, 0, 0);
 973         assert_thread_magic(thread);
 974
 975         simple_lock(&thread_stack_lock);
 976         enqueue_tail(&thread_stack_queue, &thread->runq_links);
 977         simple_unlock(&thread_stack_lock);
 978
 979         thread_wakeup((event_t)&thread_stack_queue);
 980 }
 981
 982 void
 983 thread_daemon_init(void)
 984 {
 985         kern_return_t   result;
 986         thread_t        thread = NULL;
 987
 988         simple_lock_init(&thread_terminate_lock, 0);
 989         queue_init(&thread_terminate_queue);
 990         queue_init(&crashed_threads_queue);
 991
 992         result = kernel_thread_start_priority((thread_continue_t)thread_terminate_daemon, NULL, MINPRI_KERNEL, &thread);
 993         if (result != KERN_SUCCESS)
 994                 panic("thread_daemon_init: thread_terminate_daemon");
 995
 996         thread_deallocate(thread);
 997
 998         simple_lock_init(&thread_stack_lock, 0);
 999         queue_init(&thread_stack_queue);
1000
1001         result = kernel_thread_start_priority((thread_continue_t)thread_stack_daemon, NULL, BASEPRI_PREEMPT, &thread);
1002         if (result != KERN_SUCCESS)
1003                 panic("thread_daemon_init: thread_stack_daemon");
1004
1005         thread_deallocate(thread);
1006
1007         simple_lock_init(&thread_exception_lock, 0);
1008         queue_init(&thread_exception_queue);
1009
1010         result = kernel_thread_start_priority((thread_continue_t)thread_exception_daemon, NULL, MINPRI_KERNEL, &thread);
1011         if (result != KERN_SUCCESS)
1012                 panic("thread_daemon_init: thread_exception_daemon");
1013
1014         thread_deallocate(thread);
1015 }
1016
1017 #define TH_OPTION_NONE          0x00
1018 #define TH_OPTION_NOCRED        0x01
1019 #define TH_OPTION_NOSUSP        0x02
1020
1021 /*
1022  * Create a new thread.
1023  * Doesn't start the thread running.
1024  *
1025  * Task and tasks_threads_lock are returned locked on success.
1026  */
1027 static kern_return_t
1028 thread_create_internal(
1029         task_t                                  parent_task,
1030         integer_t                               priority,
1031         thread_continue_t               continuation,
1032         int                                             options,
1033         thread_t                                *out_thread)
1034 {
1035         thread_t                                new_thread;
1036         static thread_t                 first_thread;
1037
1038         /*
1039          *      Allocate a thread and initialize static fields
1040          */
1041         if (first_thread == THREAD_NULL)
1042                 new_thread = first_thread = current_thread();
1043         else
1044                 new_thread = (thread_t)zalloc(thread_zone);
1045         if (new_thread == THREAD_NULL)
1046                 return (KERN_RESOURCE_SHORTAGE);
1047
1048         if (new_thread != first_thread)
1049                 *new_thread = thread_template;
1050
1051 #ifdef MACH_BSD
1052         new_thread->uthread = uthread_alloc(parent_task, new_thread, (options & TH_OPTION_NOCRED) != 0);
1053         if (new_thread->uthread == NULL) {
1054 #if MACH_ASSERT
1055                 new_thread->thread_magic = 0;
1056 #endif /* MACH_ASSERT */
1057
1058                 zfree(thread_zone, new_thread);
1059                 return (KERN_RESOURCE_SHORTAGE);
1060         }
1061 #endif  /* MACH_BSD */
1062
1063         if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) {
1064 #ifdef MACH_BSD
1065                 void *ut = new_thread->uthread;
1066
1067                 new_thread->uthread = NULL;
1068                 /* cred free may not be necessary */
1069                 uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1070                 uthread_cred_free(ut);
1071                 uthread_zone_free(ut);
1072 #endif  /* MACH_BSD */
1073
1074 #if MACH_ASSERT
1075                 new_thread->thread_magic = 0;
1076 #endif /* MACH_ASSERT */
1077
1078                 zfree(thread_zone, new_thread);
1079                 return (KERN_FAILURE);
1080         }
1081
1082         new_thread->task = parent_task;
1083
1084         thread_lock_init(new_thread);
1085         wake_lock_init(new_thread);
1086
1087         lck_mtx_init(&new_thread->mutex, &thread_lck_grp, &thread_lck_attr);
1088
1089         ipc_thread_init(new_thread);
1090
1091         new_thread->continuation = continuation;
1092
1093         /* Allocate I/O Statistics structure */
1094         new_thread->thread_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1095         assert(new_thread->thread_io_stats != NULL);
1096         bzero(new_thread->thread_io_stats, sizeof(struct io_stat_info));
1097
1098 #if CONFIG_IOSCHED
1099         /* Clear out the I/O Scheduling info for AppleFSCompression */
1100         new_thread->decmp_upl = NULL;
1101 #endif /* CONFIG_IOSCHED */
1102
1103         lck_mtx_lock(&tasks_threads_lock);
1104         task_lock(parent_task);
1105
1106         /*
1107          * Fail thread creation if parent task is being torn down or has too many threads
1108          * If the caller asked for TH_OPTION_NOSUSP, also fail if the parent task is suspended
1109          */
1110         if (parent_task->active == 0 || parent_task->halting ||
1111             (parent_task->suspend_count > 0 && (options & TH_OPTION_NOSUSP) != 0) ||
1112             (parent_task->thread_count >= task_threadmax && parent_task != kernel_task)) {
1113                 task_unlock(parent_task);
1114                 lck_mtx_unlock(&tasks_threads_lock);
1115
1116 #ifdef MACH_BSD
1117                 {
1118                         void *ut = new_thread->uthread;
1119
1120                         new_thread->uthread = NULL;
1121                         uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1122                         /* cred free may not be necessary */
1123                         uthread_cred_free(ut);
1124                         uthread_zone_free(ut);
1125                 }
1126 #endif  /* MACH_BSD */
1127                 ipc_thread_disable(new_thread);
1128                 ipc_thread_terminate(new_thread);
1129                 kfree(new_thread->thread_io_stats, sizeof(struct io_stat_info));
1130                 lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp);
1131                 machine_thread_destroy(new_thread);
1132                 zfree(thread_zone, new_thread);
1133                 return (KERN_FAILURE);
1134         }
1135
1136         /* New threads inherit any default state on the task */
1137         machine_thread_inherit_taskwide(new_thread, parent_task);
1138
1139         task_reference_internal(parent_task);
1140
1141         if (new_thread->task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
1142                 /*
1143                  * This task has a per-thread CPU limit; make sure this new thread
1144                  * gets its limit set too, before it gets out of the kernel.
1145                  */
1146                 set_astledger(new_thread);
1147         }
1148
1149         /* Instantiate a thread ledger. Do not fail thread creation if ledger creation fails. */
1150         if ((new_thread->t_threadledger = ledger_instantiate(thread_ledger_template,
1151                                 LEDGER_CREATE_INACTIVE_ENTRIES)) != LEDGER_NULL) {
1152
1153                 ledger_entry_setactive(new_thread->t_threadledger, thread_ledgers.cpu_time);
1154         }
1155
1156 #ifdef CONFIG_BANK
1157         new_thread->t_bankledger = LEDGER_NULL;
1158         new_thread->t_deduct_bank_ledger_time = 0;
1159 #endif
1160
1161         new_thread->t_ledger = new_thread->task->ledger;
1162         if (new_thread->t_ledger)
1163                 ledger_reference(new_thread->t_ledger);
1164
1165 #if defined(CONFIG_SCHED_MULTIQ)
1166         /* Cache the task's sched_group */
1167         new_thread->sched_group = parent_task->sched_group;
1168 #endif /* defined(CONFIG_SCHED_MULTIQ) */
1169
1170         /* Cache the task's map */
1171         new_thread->map = parent_task->map;
1172
1173         timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread);
1174         timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread);
1175
1176 #if KPC
1177         kpc_thread_create(new_thread);
1178 #endif
1179
1180         /* Set the thread's scheduling parameters */
1181         new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task);
1182         new_thread->max_priority = parent_task->max_priority;
1183         new_thread->task_priority = parent_task->priority;
1184
1185         int new_priority = (priority < 0) ? parent_task->priority: priority;
1186         new_priority = (priority < 0)? parent_task->priority: priority;
1187         if (new_priority > new_thread->max_priority)
1188                 new_priority = new_thread->max_priority;
1189
1190         new_thread->importance = new_priority - new_thread->task_priority;
1191
1192         sched_set_thread_base_priority(new_thread, new_priority);
1193
1194 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1195         new_thread->sched_stamp = sched_tick;
1196         new_thread->pri_shift = sched_pri_shifts[new_thread->th_sched_bucket];
1197 #endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) */
1198
1199
1200         thread_policy_create(new_thread);
1201
1202         /* Chain the thread onto the task's list */
1203         queue_enter(&parent_task->threads, new_thread, thread_t, task_threads);
1204         parent_task->thread_count++;
1205
1206         /* So terminating threads don't need to take the task lock to decrement */
1207         hw_atomic_add(&parent_task->active_thread_count, 1);
1208
1209         /* Protected by the tasks_threads_lock */
1210         new_thread->thread_id = ++thread_unique_id;
1211
1212         queue_enter(&threads, new_thread, thread_t, threads);
1213         threads_count++;
1214
1215         new_thread->active = TRUE;
1216         if (task_is_a_corpse_fork(parent_task)) {
1217                 /* Set the inspection bit if the task is a corpse fork */
1218                 new_thread->inspection = TRUE;
1219         } else {
1220                 new_thread->inspection = FALSE;
1221         }
1222         new_thread->corpse_dup = FALSE;
1223         *out_thread = new_thread;
1224
1225         {
1226                 long    dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
1227
1228                 kdbg_trace_data(parent_task->bsd_info, &dbg_arg2);
1229
1230                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1231                         TRACE_DATA_NEWTHREAD | DBG_FUNC_NONE,
1232                         (vm_address_t)(uintptr_t)thread_tid(new_thread), dbg_arg2, 0, 0, 0);
1233
1234                 kdbg_trace_string(parent_task->bsd_info,
1235                                                         &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
1236
1237                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1238                         TRACE_STRING_NEWTHREAD | DBG_FUNC_NONE,
1239                         dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
1240         }
1241
1242         DTRACE_PROC1(lwp__create, thread_t, *out_thread);
1243
1244         return (KERN_SUCCESS);
1245 }
1246
1247 static kern_return_t
1248 thread_create_internal2(
1249         task_t                          task,
1250         thread_t                        *new_thread,
1251         boolean_t                       from_user,
1252         thread_continue_t               continuation)
1253 {
1254         kern_return_t           result;
1255         thread_t                        thread;
1256
1257         if (task == TASK_NULL || task == kernel_task)
1258                 return (KERN_INVALID_ARGUMENT);
1259
1260         result = thread_create_internal(task, -1, continuation, TH_OPTION_NONE, &thread);
1261         if (result != KERN_SUCCESS)
1262                 return (result);
1263
1264         thread->user_stop_count = 1;
1265         thread_hold(thread);
1266         if (task->suspend_count > 0)
1267                 thread_hold(thread);
1268
1269         if (from_user)
1270                 extmod_statistics_incr_thread_create(task);
1271
1272         task_unlock(task);
1273         lck_mtx_unlock(&tasks_threads_lock);
1274
1275         *new_thread = thread;
1276
1277         return (KERN_SUCCESS);
1278 }
1279
1280 /* No prototype, since task_server.h has the _from_user version if KERNEL_SERVER */
1281 kern_return_t
1282 thread_create(
1283         task_t                          task,
1284         thread_t                        *new_thread);
1285
1286 kern_return_t
1287 thread_create(
1288         task_t                          task,
1289         thread_t                        *new_thread)
1290 {
1291         return thread_create_internal2(task, new_thread, FALSE, (thread_continue_t)thread_bootstrap_return);
1292 }
1293
1294 kern_return_t
1295 thread_create_from_user(
1296         task_t                          task,
1297         thread_t                        *new_thread)
1298 {
1299         return thread_create_internal2(task, new_thread, TRUE, (thread_continue_t)thread_bootstrap_return);
1300 }
1301
1302 kern_return_t
1303 thread_create_with_continuation(
1304         task_t                          task,
1305         thread_t                        *new_thread,
1306         thread_continue_t               continuation)
1307 {
1308         return thread_create_internal2(task, new_thread, FALSE, continuation);
1309 }
1310
1311 static kern_return_t
1312 thread_create_running_internal2(
1313         task_t         task,
1314         int                     flavor,
1315         thread_state_t          new_state,
1316         mach_msg_type_number_t  new_state_count,
1317         thread_t                                *new_thread,
1318         boolean_t                               from_user)
1319 {
1320         kern_return_t  result;
1321         thread_t                                thread;
1322
1323         if (task == TASK_NULL || task == kernel_task)
1324                 return (KERN_INVALID_ARGUMENT);
1325
1326         result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, TH_OPTION_NONE, &thread);
1327         if (result != KERN_SUCCESS)
1328                 return (result);
1329
1330         if (task->suspend_count > 0)
1331                 thread_hold(thread);
1332
1333         result = machine_thread_set_state(thread, flavor, new_state, new_state_count);
1334         if (result != KERN_SUCCESS) {
1335                 task_unlock(task);
1336                 lck_mtx_unlock(&tasks_threads_lock);
1337
1338                 thread_terminate(thread);
1339                 thread_deallocate(thread);
1340                 return (result);
1341         }
1342
1343         thread_mtx_lock(thread);
1344         thread_start(thread);
1345         thread_mtx_unlock(thread);
1346
1347         if (from_user)
1348                 extmod_statistics_incr_thread_create(task);
1349
1350         task_unlock(task);
1351         lck_mtx_unlock(&tasks_threads_lock);
1352
1353         *new_thread = thread;
1354
1355         return (result);
1356 }
1357
1358 /* Prototype, see justification above */
1359 kern_return_t
1360 thread_create_running(
1361         task_t         task,
1362         int                     flavor,
1363         thread_state_t          new_state,
1364         mach_msg_type_number_t  new_state_count,
1365         thread_t                                *new_thread);
1366
1367 kern_return_t
1368 thread_create_running(
1369         task_t         task,
1370         int                     flavor,
1371         thread_state_t          new_state,
1372         mach_msg_type_number_t  new_state_count,
1373         thread_t                                *new_thread)
1374 {
1375         return thread_create_running_internal2(
1376                 task, flavor, new_state, new_state_count,
1377                 new_thread, FALSE);
1378 }
1379
1380 kern_return_t
1381 thread_create_running_from_user(
1382         task_t         task,
1383         int                     flavor,
1384         thread_state_t          new_state,
1385         mach_msg_type_number_t  new_state_count,
1386         thread_t                                *new_thread)
1387 {
1388         return thread_create_running_internal2(
1389                 task, flavor, new_state, new_state_count,
1390                 new_thread, TRUE);
1391 }
1392
1393 kern_return_t
1394 thread_create_workq(
1395         task_t                          task,
1396         thread_continue_t               thread_return,
1397         thread_t                        *new_thread)
1398 {
1399         kern_return_t           result;
1400         thread_t                        thread;
1401
1402         if (task == TASK_NULL || task == kernel_task)
1403                 return (KERN_INVALID_ARGUMENT);
1404
1405         result = thread_create_internal(task, -1, thread_return, TH_OPTION_NOCRED | TH_OPTION_NOSUSP, &thread);
1406         if (result != KERN_SUCCESS)
1407                 return (result);
1408
1409         thread->user_stop_count = 1;
1410         thread_hold(thread);
1411         if (task->suspend_count > 0)
1412                 thread_hold(thread);
1413
1414         task_unlock(task);
1415         lck_mtx_unlock(&tasks_threads_lock);
1416
1417         *new_thread = thread;
1418
1419         return (KERN_SUCCESS);
1420 }
1421
1422 kern_return_t
1423 thread_create_workq_waiting(
1424         task_t              task,
1425         thread_continue_t   thread_return,
1426         event_t             event,
1427         thread_t            *new_thread)
1428 {
1429         thread_t            thread;
1430         kern_return_t       result;
1431
1432         if (task == TASK_NULL || task == kernel_task)
1433                 return KERN_INVALID_ARGUMENT;
1434
1435         result = thread_create_internal(task, -1, thread_return, TH_OPTION_NOCRED | TH_OPTION_NOSUSP, &thread);
1436
1437         if (result != KERN_SUCCESS)
1438                 return result;
1439
1440         if (task->suspend_count > 0)
1441                 thread_hold(thread);
1442
1443         thread_mtx_lock(thread);
1444         thread_start_in_assert_wait(thread, event, THREAD_INTERRUPTIBLE);
1445         thread_mtx_unlock(thread);
1446
1447         task_unlock(task);
1448         lck_mtx_unlock(&tasks_threads_lock);
1449
1450         *new_thread = thread;
1451
1452         return result;
1453 }
1454
1455 /*
1456  *      kernel_thread_create:
1457  *
1458  *      Create a thread in the kernel task
1459  *      to execute in kernel context.
1460  */
1461 kern_return_t
1462 kernel_thread_create(
1463         thread_continue_t       continuation,
1464         void                            *parameter,
1465         integer_t                       priority,
1466         thread_t                        *new_thread)
1467 {
1468         kern_return_t           result;
1469         thread_t                        thread;
1470         task_t                          task = kernel_task;
1471
1472         result = thread_create_internal(task, priority, continuation, TH_OPTION_NONE, &thread);
1473         if (result != KERN_SUCCESS)
1474                 return (result);
1475
1476         task_unlock(task);
1477         lck_mtx_unlock(&tasks_threads_lock);
1478
1479         stack_alloc(thread);
1480         assert(thread->kernel_stack != 0);
1481         thread->reserved_stack = thread->kernel_stack;
1482
1483         thread->parameter = parameter;
1484
1485 if(debug_task & 1)
1486         kprintf("kernel_thread_create: thread = %p continuation = %p\n", thread, continuation);
1487         *new_thread = thread;
1488
1489         return (result);
1490 }
1491
1492 kern_return_t
1493 kernel_thread_start_priority(
1494         thread_continue_t       continuation,
1495         void                            *parameter,
1496         integer_t                       priority,
1497         thread_t                        *new_thread)
1498 {
1499         kern_return_t   result;
1500         thread_t                thread;
1501
1502         result = kernel_thread_create(continuation, parameter, priority, &thread);
1503         if (result != KERN_SUCCESS)
1504                 return (result);
1505
1506         *new_thread = thread;
1507
1508         thread_mtx_lock(thread);
1509         thread_start(thread);
1510         thread_mtx_unlock(thread);
1511
1512         return (result);
1513 }
1514
1515 kern_return_t
1516 kernel_thread_start(
1517         thread_continue_t       continuation,
1518         void                            *parameter,
1519         thread_t                        *new_thread)
1520 {
1521         return kernel_thread_start_priority(continuation, parameter, -1, new_thread);
1522 }
1523
1524 /* Separated into helper function so it can be used by THREAD_BASIC_INFO and THREAD_EXTENDED_INFO */
1525 /* it is assumed that the thread is locked by the caller */
1526 static void
1527 retrieve_thread_basic_info(thread_t thread, thread_basic_info_t basic_info)
1528 {
1529         int     state, flags;
1530
1531         /* fill in info */
1532
1533         thread_read_times(thread, &basic_info->user_time,
1534                                                                 &basic_info->system_time);
1535
1536         /*
1537          *      Update lazy-evaluated scheduler info because someone wants it.
1538          */
1539         if (SCHED(can_update_priority)(thread))
1540                 SCHED(update_priority)(thread);
1541
1542         basic_info->sleep_time = 0;
1543
1544         /*
1545          *      To calculate cpu_usage, first correct for timer rate,
1546          *      then for 5/8 ageing.  The correction factor [3/5] is
1547          *      (1/(5/8) - 1).
1548          */
1549         basic_info->cpu_usage = 0;
1550 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1551         if (sched_tick_interval) {
1552                 basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage
1553                                                                         * TH_USAGE_SCALE) /     sched_tick_interval);
1554                 basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
1555         }
1556 #endif
1557
1558         if (basic_info->cpu_usage > TH_USAGE_SCALE)
1559                 basic_info->cpu_usage = TH_USAGE_SCALE;
1560
1561         basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
1562                                                                                         POLICY_TIMESHARE: POLICY_RR);
1563
1564         flags = 0;
1565         if (thread->options & TH_OPT_IDLE_THREAD)
1566                 flags |= TH_FLAGS_IDLE;
1567
1568         if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
1569                 flags |= TH_FLAGS_GLOBAL_FORCED_IDLE;
1570         }
1571
1572         if (!thread->kernel_stack)
1573                 flags |= TH_FLAGS_SWAPPED;
1574
1575         state = 0;
1576         if (thread->state & TH_TERMINATE)
1577                 state = TH_STATE_HALTED;
1578         else
1579         if (thread->state & TH_RUN)
1580                 state = TH_STATE_RUNNING;
1581         else
1582         if (thread->state & TH_UNINT)
1583                 state = TH_STATE_UNINTERRUPTIBLE;
1584         else
1585         if (thread->state & TH_SUSP)
1586                 state = TH_STATE_STOPPED;
1587         else
1588         if (thread->state & TH_WAIT)
1589                 state = TH_STATE_WAITING;
1590
1591         basic_info->run_state = state;
1592         basic_info->flags = flags;
1593
1594         basic_info->suspend_count = thread->user_stop_count;
1595
1596         return;
1597 }
1598
1599 kern_return_t
1600 thread_info_internal(
1601         thread_t                thread,
1602         thread_flavor_t                 flavor,
1603         thread_info_t                   thread_info_out,        /* ptr to OUT array */
1604         mach_msg_type_number_t  *thread_info_count)     /*IN/OUT*/
1605 {
1606         spl_t   s;
1607
1608         if (thread == THREAD_NULL)
1609                 return (KERN_INVALID_ARGUMENT);
1610
1611         if (flavor == THREAD_BASIC_INFO) {
1612
1613                 if (*thread_info_count < THREAD_BASIC_INFO_COUNT)
1614                         return (KERN_INVALID_ARGUMENT);
1615
1616                 s = splsched();
1617                 thread_lock(thread);
1618
1619                 retrieve_thread_basic_info(thread, (thread_basic_info_t) thread_info_out);
1620
1621                 thread_unlock(thread);
1622                 splx(s);
1623
1624                 *thread_info_count = THREAD_BASIC_INFO_COUNT;
1625
1626                 return (KERN_SUCCESS);
1627         }
1628         else
1629         if (flavor == THREAD_IDENTIFIER_INFO) {
1630                 thread_identifier_info_t        identifier_info;
1631
1632                 if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT)
1633                         return (KERN_INVALID_ARGUMENT);
1634
1635                 identifier_info = (thread_identifier_info_t) thread_info_out;
1636
1637                 s = splsched();
1638                 thread_lock(thread);
1639
1640                 identifier_info->thread_id = thread->thread_id;
1641                 identifier_info->thread_handle = thread->machine.cthread_self;
1642                 identifier_info->dispatch_qaddr = thread_dispatchqaddr(thread);
1643
1644                 thread_unlock(thread);
1645                 splx(s);
1646                 return KERN_SUCCESS;
1647         }
1648         else
1649         if (flavor == THREAD_SCHED_TIMESHARE_INFO) {
1650                 policy_timeshare_info_t         ts_info;
1651
1652                 if (*thread_info_count < POLICY_TIMESHARE_INFO_COUNT)
1653                         return (KERN_INVALID_ARGUMENT);
1654
1655                 ts_info = (policy_timeshare_info_t)thread_info_out;
1656
1657                 s = splsched();
1658                 thread_lock(thread);
1659
1660                 if (thread->sched_mode != TH_MODE_TIMESHARE) {
1661                         thread_unlock(thread);
1662                         splx(s);
1663                         return (KERN_INVALID_POLICY);
1664                 }
1665
1666                 ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
1667                 if (ts_info->depressed) {
1668                         ts_info->base_priority = DEPRESSPRI;
1669                         ts_info->depress_priority = thread->base_pri;
1670                 }
1671                 else {
1672                         ts_info->base_priority = thread->base_pri;
1673                         ts_info->depress_priority = -1;
1674                 }
1675
1676                 ts_info->cur_priority = thread->sched_pri;
1677                 ts_info->max_priority = thread->max_priority;
1678
1679                 thread_unlock(thread);
1680                 splx(s);
1681
1682                 *thread_info_count = POLICY_TIMESHARE_INFO_COUNT;
1683
1684                 return (KERN_SUCCESS);
1685         }
1686         else
1687         if (flavor == THREAD_SCHED_FIFO_INFO) {
1688                 if (*thread_info_count < POLICY_FIFO_INFO_COUNT)
1689                         return (KERN_INVALID_ARGUMENT);
1690
1691                 return (KERN_INVALID_POLICY);
1692         }
1693         else
1694         if (flavor == THREAD_SCHED_RR_INFO) {
1695                 policy_rr_info_t                        rr_info;
1696                 uint32_t quantum_time;
1697                 uint64_t quantum_ns;
1698
1699                 if (*thread_info_count < POLICY_RR_INFO_COUNT)
1700                         return (KERN_INVALID_ARGUMENT);
1701
1702                 rr_info = (policy_rr_info_t) thread_info_out;
1703
1704                 s = splsched();
1705                 thread_lock(thread);
1706
1707                 if (thread->sched_mode == TH_MODE_TIMESHARE) {
1708                         thread_unlock(thread);
1709                         splx(s);
1710
1711                         return (KERN_INVALID_POLICY);
1712             }
1713
1714                 rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
1715                 if (rr_info->depressed) {
1716                         rr_info->base_priority = DEPRESSPRI;
1717                         rr_info->depress_priority = thread->base_pri;
1718                 }
1719                 else {
1720                         rr_info->base_priority = thread->base_pri;
1721                         rr_info->depress_priority = -1;
1722                 }
1723
1724                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
1725                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
1726
1727                 rr_info->max_priority = thread->max_priority;
1728                 rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
1729
1730                 thread_unlock(thread);
1731                 splx(s);
1732
1733                 *thread_info_count = POLICY_RR_INFO_COUNT;
1734
1735                 return (KERN_SUCCESS);
1736         }
1737         else
1738         if (flavor == THREAD_EXTENDED_INFO) {
1739                 thread_basic_info_data_t        basic_info;
1740                 thread_extended_info_t          extended_info = (thread_extended_info_t) thread_info_out;
1741
1742                 if (*thread_info_count < THREAD_EXTENDED_INFO_COUNT) {
1743                         return (KERN_INVALID_ARGUMENT);
1744                 }
1745
1746                 s = splsched();
1747                 thread_lock(thread);
1748
1749                 /* NOTE: This mimics fill_taskthreadinfo(), which is the function used by proc_pidinfo() for
1750                  * the PROC_PIDTHREADINFO flavor (which can't be used on corpses)
1751                  */
1752                 retrieve_thread_basic_info(thread, &basic_info);
1753                 extended_info->pth_user_time = ((basic_info.user_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.user_time.microseconds * (integer_t)NSEC_PER_USEC));
1754                 extended_info->pth_system_time = ((basic_info.system_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.system_time.microseconds * (integer_t)NSEC_PER_USEC));
1755
1756                 extended_info->pth_cpu_usage = basic_info.cpu_usage;
1757                 extended_info->pth_policy = basic_info.policy;
1758                 extended_info->pth_run_state = basic_info.run_state;
1759                 extended_info->pth_flags = basic_info.flags;
1760                 extended_info->pth_sleep_time = basic_info.sleep_time;
1761                 extended_info->pth_curpri = thread->sched_pri;
1762                 extended_info->pth_priority = thread->base_pri;
1763                 extended_info->pth_maxpriority = thread->max_priority;
1764
1765                 bsd_getthreadname(thread->uthread,extended_info->pth_name);
1766
1767                 thread_unlock(thread);
1768                 splx(s);
1769
1770                 *thread_info_count = THREAD_EXTENDED_INFO_COUNT;
1771
1772                 return (KERN_SUCCESS);
1773         }
1774         else
1775         if (flavor == THREAD_DEBUG_INFO_INTERNAL) {
1776 #if DEVELOPMENT || DEBUG
1777                 thread_debug_info_internal_t dbg_info;
1778                 if (*thread_info_count < THREAD_DEBUG_INFO_INTERNAL_COUNT)
1779                         return (KERN_NOT_SUPPORTED);
1780
1781                 if (thread_info_out == NULL)
1782                         return (KERN_INVALID_ARGUMENT);
1783
1784                 dbg_info = (thread_debug_info_internal_t) thread_info_out;
1785                 dbg_info->page_creation_count = thread->t_page_creation_count;
1786
1787                 *thread_info_count = THREAD_DEBUG_INFO_INTERNAL_COUNT;
1788                 return (KERN_SUCCESS);
1789 #endif /* DEVELOPMENT || DEBUG */
1790                 return (KERN_NOT_SUPPORTED);
1791         }
1792
1793         return (KERN_INVALID_ARGUMENT);
1794 }
1795
1796 void
1797 thread_read_times(
1798         thread_t                thread,
1799         time_value_t    *user_time,
1800         time_value_t    *system_time)
1801 {
1802         clock_sec_t             secs;
1803         clock_usec_t    usecs;
1804         uint64_t                tval_user, tval_system;
1805
1806         tval_user = timer_grab(&thread->user_timer);
1807         tval_system = timer_grab(&thread->system_timer);
1808
1809         if (thread->precise_user_kernel_time) {
1810                 absolutetime_to_microtime(tval_user, &secs, &usecs);
1811                 user_time->seconds = (typeof(user_time->seconds))secs;
1812                 user_time->microseconds = usecs;
1813
1814                 absolutetime_to_microtime(tval_system, &secs, &usecs);
1815                 system_time->seconds = (typeof(system_time->seconds))secs;
1816                 system_time->microseconds = usecs;
1817         } else {
1818                 /* system_timer may represent either sys or user */
1819                 tval_user += tval_system;
1820                 absolutetime_to_microtime(tval_user, &secs, &usecs);
1821                 user_time->seconds = (typeof(user_time->seconds))secs;
1822                 user_time->microseconds = usecs;
1823
1824                 system_time->seconds = 0;
1825                 system_time->microseconds = 0;
1826         }
1827 }
1828
1829 uint64_t thread_get_runtime_self(void)
1830 {
1831         boolean_t interrupt_state;
1832         uint64_t runtime;
1833         thread_t thread = NULL;
1834         processor_t processor = NULL;
1835
1836         thread = current_thread();
1837
1838         /* Not interrupt safe, as the scheduler may otherwise update timer values underneath us */
1839         interrupt_state = ml_set_interrupts_enabled(FALSE);
1840         processor = current_processor();
1841         timer_switch(PROCESSOR_DATA(processor, thread_timer), mach_absolute_time(), PROCESSOR_DATA(processor, thread_timer));
1842         runtime = (timer_grab(&thread->user_timer) + timer_grab(&thread->system_timer));
1843         ml_set_interrupts_enabled(interrupt_state);
1844
1845         return runtime;
1846 }
1847
1848 kern_return_t
1849 thread_assign(
1850         __unused thread_t                       thread,
1851         __unused processor_set_t        new_pset)
1852 {
1853         return (KERN_FAILURE);
1854 }
1855
1856 /*
1857  *      thread_assign_default:
1858  *
1859  *      Special version of thread_assign for assigning threads to default
1860  *      processor set.
1861  */
1862 kern_return_t
1863 thread_assign_default(
1864         thread_t                thread)
1865 {
1866         return (thread_assign(thread, &pset0));
1867 }
1868
1869 /*
1870  *      thread_get_assignment
1871  *
1872  *      Return current assignment for this thread.
1873  */
1874 kern_return_t
1875 thread_get_assignment(
1876         thread_t                thread,
1877         processor_set_t *pset)
1878 {
1879         if (thread == NULL)
1880                 return (KERN_INVALID_ARGUMENT);
1881
1882         *pset = &pset0;
1883
1884         return (KERN_SUCCESS);
1885 }
1886
1887 /*
1888  *      thread_wire_internal:
1889  *
1890  *      Specify that the target thread must always be able
1891  *      to run and to allocate memory.
1892  */
1893 kern_return_t
1894 thread_wire_internal(
1895         host_priv_t             host_priv,
1896         thread_t                thread,
1897         boolean_t               wired,
1898         boolean_t               *prev_state)
1899 {
1900         if (host_priv == NULL || thread != current_thread())
1901                 return (KERN_INVALID_ARGUMENT);
1902
1903         assert(host_priv == &realhost);
1904
1905         if (prev_state)
1906             *prev_state = (thread->options & TH_OPT_VMPRIV) != 0;
1907
1908         if (wired) {
1909             if (!(thread->options & TH_OPT_VMPRIV))
1910                     vm_page_free_reserve(1);    /* XXX */
1911             thread->options |= TH_OPT_VMPRIV;
1912         }
1913         else {
1914             if (thread->options & TH_OPT_VMPRIV)
1915                     vm_page_free_reserve(-1);   /* XXX */
1916             thread->options &= ~TH_OPT_VMPRIV;
1917         }
1918
1919         return (KERN_SUCCESS);
1920 }
1921
1922
1923 /*
1924  *      thread_wire:
1925  *
1926  *      User-api wrapper for thread_wire_internal()
1927  */
1928 kern_return_t
1929 thread_wire(
1930         host_priv_t     host_priv,
1931         thread_t        thread,
1932         boolean_t       wired)
1933 {
1934     return (thread_wire_internal(host_priv, thread, wired, NULL));
1935 }
1936
1937
1938 boolean_t
1939 is_vm_privileged(void)
1940 {
1941         return current_thread()->options & TH_OPT_VMPRIV ? TRUE : FALSE;
1942 }
1943
1944 boolean_t
1945 set_vm_privilege(boolean_t privileged)
1946 {
1947         boolean_t       was_vmpriv;
1948
1949         if (current_thread()->options & TH_OPT_VMPRIV)
1950                 was_vmpriv = TRUE;
1951         else
1952                 was_vmpriv = FALSE;
1953
1954         if (privileged != FALSE)
1955                 current_thread()->options |= TH_OPT_VMPRIV;
1956         else
1957                 current_thread()->options &= ~TH_OPT_VMPRIV;
1958
1959         return (was_vmpriv);
1960 }
1961
1962 void
1963 set_thread_rwlock_boost(void)
1964 {
1965         current_thread()->rwlock_count++;
1966 }
1967
1968 void
1969 clear_thread_rwlock_boost(void)
1970 {
1971         thread_t thread = current_thread();
1972
1973         if ((thread->rwlock_count-- == 1) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1974
1975                 lck_rw_clear_promotion(thread);
1976         }
1977 }
1978
1979 /*
1980  * XXX assuming current thread only, for now...
1981  */
1982 void
1983 thread_guard_violation(thread_t thread, unsigned type)
1984 {
1985         assert(thread == current_thread());
1986
1987         spl_t s = splsched();
1988         /*
1989          * Use the saved state area of the thread structure
1990          * to store all info required to handle the AST when
1991          * returning to userspace
1992          */
1993         thread->guard_exc_info.type = type;
1994         thread_ast_set(thread, AST_GUARD);
1995         ast_propagate(thread->ast);
1996
1997         splx(s);
1998 }
1999
2000 /*
2001  *      guard_ast:
2002  *
2003  *      Handle AST_GUARD for a thread. This routine looks at the
2004  *      state saved in the thread structure to determine the cause
2005  *      of this exception. Based on this value, it invokes the
2006  *      appropriate routine which determines other exception related
2007  *      info and raises the exception.
2008  */
2009 void
2010 guard_ast(thread_t thread)
2011 {
2012         if (thread->guard_exc_info.type == GUARD_TYPE_MACH_PORT)
2013                 mach_port_guard_ast(thread);
2014         else
2015                 fd_guard_ast(thread);
2016 }
2017
2018 static void
2019 thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1)
2020 {
2021         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
2022 #if CONFIG_TELEMETRY
2023                 /*
2024                  * This thread is in danger of violating the CPU usage monitor. Enable telemetry
2025                  * on the entire task so there are micro-stackshots available if and when
2026                  * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots
2027                  * for this thread only; but now that this task is suspect, knowing what all of
2028                  * its threads are up to will be useful.
2029                  */
2030                 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1);
2031 #endif
2032                 return;
2033         }
2034
2035 #if CONFIG_TELEMETRY
2036         /*
2037          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
2038          * exceeded the limit, turn telemetry off for the task.
2039          */
2040         telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0);
2041 #endif
2042
2043         if (warning == 0) {
2044                 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU();
2045         }
2046 }
2047
2048 void __attribute__((noinline))
2049 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void)
2050 {
2051         int          pid                = 0;
2052         task_t           task                           = current_task();
2053         thread_t     thread             = current_thread();
2054         uint64_t     tid                = thread->thread_id;
2055         const char       *procname          = "unknown";
2056         time_value_t thread_total_time  = {0, 0};
2057         time_value_t thread_system_time;
2058         time_value_t thread_user_time;
2059         int          action;
2060         uint8_t      percentage;
2061         uint32_t     usage_percent = 0;
2062         uint32_t     interval_sec;
2063         uint64_t     interval_ns;
2064         uint64_t     balance_ns;
2065         boolean_t        fatal = FALSE;
2066         boolean_t        send_exc_resource = TRUE; /* in addition to RESOURCE_NOTIFY */
2067         kern_return_t   kr;
2068
2069 #ifdef EXC_RESOURCE_MONITORS
2070         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
2071 #endif /* EXC_RESOURCE_MONITORS */
2072         struct ledger_entry_info        lei;
2073
2074         assert(thread->t_threadledger != LEDGER_NULL);
2075
2076         /*
2077          * Extract the fatal bit and suspend the monitor (which clears the bit).
2078          */
2079         task_lock(task);
2080         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) {
2081                 fatal = TRUE;
2082                 send_exc_resource = TRUE;
2083         }
2084         /* Only one thread can be here at a time.  Whichever makes it through
2085            first will successfully suspend the monitor and proceed to send the
2086            notification.  Other threads will get an error trying to suspend the
2087            monitor and give up on sending the notification.  In the first release,
2088            the monitor won't be resumed for a number of seconds, but we may
2089            eventually need to handle low-latency resume.
2090          */
2091         kr = task_suspend_cpumon(task);
2092         task_unlock(task);
2093         if (kr == KERN_INVALID_ARGUMENT)        return;
2094
2095 #ifdef MACH_BSD
2096         pid = proc_selfpid();
2097         if (task->bsd_info != NULL) {
2098                 procname = proc_name_address(task->bsd_info);
2099         }
2100 #endif
2101
2102         thread_get_cpulimit(&action, &percentage, &interval_ns);
2103
2104         interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC);
2105
2106         thread_read_times(thread, &thread_user_time, &thread_system_time);
2107         time_value_add(&thread_total_time, &thread_user_time);
2108         time_value_add(&thread_total_time, &thread_system_time);
2109         ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei);
2110
2111         /* credit/debit/balance/limit are in absolute time units;
2112            the refill info is in nanoseconds. */
2113         absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns);
2114         if (lei.lei_last_refill > 0) {
2115                 usage_percent = (uint32_t)((balance_ns*100ULL) / lei.lei_last_refill);
2116         }
2117
2118         /* TODO: show task total runtime (via TASK_ABSOLUTETIME_INFO)? */
2119         printf("process %s[%d] thread %llu caught burning CPU! "
2120                "It used more than %d%% CPU over %u seconds "
2121                "(actual recent usage: %d%% over ~%llu seconds).  "
2122                "Thread lifetime cpu usage %d.%06ds, (%d.%06d user, %d.%06d sys) "
2123                "ledger balance: %lld mabs credit: %lld mabs debit: %lld mabs "
2124                "limit: %llu mabs period: %llu ns last refill: %llu ns%s.\n",
2125                procname, pid, tid,
2126                percentage, interval_sec,
2127                usage_percent,
2128                (lei.lei_last_refill + NSEC_PER_SEC/2) / NSEC_PER_SEC,
2129                thread_total_time.seconds, thread_total_time.microseconds,
2130                thread_user_time.seconds, thread_user_time.microseconds,
2131                thread_system_time.seconds,thread_system_time.microseconds,
2132                lei.lei_balance, lei.lei_credit, lei.lei_debit,
2133                lei.lei_limit, lei.lei_refill_period, lei.lei_last_refill,
2134                (fatal ? " [fatal violation]" : ""));
2135
2136         /*
2137            For now, send RESOURCE_NOTIFY in parallel with EXC_RESOURCE.  Once
2138            we have logging parity, we will stop sending EXC_RESOURCE (24508922).
2139          */
2140
2141         /* RESOURCE_NOTIFY MIG specifies nanoseconds of CPU time */
2142         lei.lei_balance = balance_ns;
2143         absolutetime_to_nanoseconds(lei.lei_limit, &lei.lei_limit);
2144         trace_resource_violation(RMON_CPUUSAGE_VIOLATED, &lei);
2145         kr = send_resource_violation(send_cpu_usage_violation, task, &lei,
2146                                                                  fatal ? kRNFatalLimitFlag : 0);
2147         if (kr) {
2148                 printf("send_resource_violation(CPU usage, ...): error %#x\n", kr);
2149         }
2150
2151 #ifdef EXC_RESOURCE_MONITORS
2152         if (send_exc_resource) {
2153                 if (disable_exc_resource) {
2154                         printf("process %s[%d] thread %llu caught burning CPU! "
2155                                    "EXC_RESOURCE%s supressed by a boot-arg\n",
2156                                    procname, pid, tid, fatal ? " (and termination)" : "");
2157                         return;
2158                 }
2159
2160                 if (audio_active) {
2161                         printf("process %s[%d] thread %llu caught burning CPU! "
2162                            "EXC_RESOURCE & termination supressed due to audio playback\n",
2163                                    procname, pid, tid);
2164                         return;
2165                 }
2166         }
2167
2168
2169         if (send_exc_resource) {
2170                 code[0] = code[1] = 0;
2171                 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU);
2172                 if (fatal) {
2173                         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR_FATAL);
2174                 }else {
2175                         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR);
2176                 }
2177                 EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec);
2178                 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], percentage);
2179                 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent);
2180                 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
2181         }
2182 #endif /* EXC_RESOURCE_MONITORS */
2183
2184         if (fatal) {
2185 #if CONFIG_JETSAM
2186                 jetsam_on_ledger_cpulimit_exceeded();
2187 #else
2188                 task_terminate_internal(task);
2189 #endif
2190         }
2191 }
2192
2193 void thread_update_io_stats(thread_t thread, int size, int io_flags)
2194 {
2195         int io_tier;
2196
2197         if (thread->thread_io_stats == NULL || thread->task->task_io_stats == NULL)
2198                 return;
2199
2200         if (io_flags & DKIO_READ) {
2201                 UPDATE_IO_STATS(thread->thread_io_stats->disk_reads, size);
2202                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->disk_reads, size);
2203         }
2204
2205         if (io_flags & DKIO_META) {
2206                 UPDATE_IO_STATS(thread->thread_io_stats->metadata, size);
2207                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->metadata, size);
2208         }
2209
2210         if (io_flags & DKIO_PAGING) {
2211                 UPDATE_IO_STATS(thread->thread_io_stats->paging, size);
2212                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->paging, size);
2213         }
2214
2215         io_tier = ((io_flags & DKIO_TIER_MASK) >> DKIO_TIER_SHIFT);
2216         assert (io_tier < IO_NUM_PRIORITIES);
2217
2218         UPDATE_IO_STATS(thread->thread_io_stats->io_priority[io_tier], size);
2219         UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->io_priority[io_tier], size);
2220
2221         /* Update Total I/O Counts */
2222         UPDATE_IO_STATS(thread->thread_io_stats->total_io, size);
2223         UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->total_io, size);
2224
2225         if (!(io_flags & DKIO_READ)) {
2226                 DTRACE_IO3(physical_writes, struct task *, thread->task, uint32_t, size, int, io_flags);
2227                 ledger_credit(thread->task->ledger, task_ledgers.physical_writes, size);
2228         }
2229 }
2230
2231 static void
2232 init_thread_ledgers(void) {
2233         ledger_template_t t;
2234         int idx;
2235
2236         assert(thread_ledger_template == NULL);
2237
2238         if ((t = ledger_template_create("Per-thread ledger")) == NULL)
2239                 panic("couldn't create thread ledger template");
2240
2241         if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) {
2242                 panic("couldn't create cpu_time entry for thread ledger template");
2243         }
2244
2245         if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) {
2246                 panic("couldn't set thread ledger callback for cpu_time entry");
2247         }
2248
2249         thread_ledgers.cpu_time = idx;
2250
2251         thread_ledger_template = t;
2252 }
2253
2254 /*
2255  * Returns currently applied CPU usage limit, or 0/0 if none is applied.
2256  */
2257 int
2258 thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns)
2259 {
2260         int64_t         abstime = 0;
2261         uint64_t        limittime = 0;
2262         thread_t        thread = current_thread();
2263
2264         *percentage  = 0;
2265         *interval_ns = 0;
2266         *action      = 0;
2267
2268         if (thread->t_threadledger == LEDGER_NULL) {
2269                 /*
2270                  * This thread has no per-thread ledger, so it can't possibly
2271                  * have a CPU limit applied.
2272                  */
2273                 return (KERN_SUCCESS);
2274         }
2275
2276         ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns);
2277         ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime);
2278
2279         if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) {
2280                 /*
2281                  * This thread's CPU time ledger has no period or limit; so it
2282                  * doesn't have a CPU limit applied.
2283                  */
2284                  return (KERN_SUCCESS);
2285         }
2286
2287         /*
2288          * This calculation is the converse to the one in thread_set_cpulimit().
2289          */
2290         absolutetime_to_nanoseconds(abstime, &limittime);
2291         *percentage = (limittime * 100ULL) / *interval_ns;
2292         assert(*percentage <= 100);
2293
2294         if (thread->options & TH_OPT_PROC_CPULIMIT) {
2295                 assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0);
2296
2297                 *action = THREAD_CPULIMIT_BLOCK;
2298         } else if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2299                 assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0);
2300
2301                 *action = THREAD_CPULIMIT_EXCEPTION;
2302         } else {
2303                 *action = THREAD_CPULIMIT_DISABLE;
2304         }
2305
2306         return (KERN_SUCCESS);
2307 }
2308
2309 /*
2310  * Set CPU usage limit on a thread.
2311  *
2312  * Calling with percentage of 0 will unset the limit for this thread.
2313  */
2314 int
2315 thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns)
2316 {
2317         thread_t        thread = current_thread();
2318         ledger_t        l;
2319         uint64_t        limittime = 0;
2320         uint64_t        abstime = 0;
2321
2322         assert(percentage <= 100);
2323
2324         if (action == THREAD_CPULIMIT_DISABLE) {
2325                 /*
2326                  * Remove CPU limit, if any exists.
2327                  */
2328                 if (thread->t_threadledger != LEDGER_NULL) {
2329                         l = thread->t_threadledger;
2330                         ledger_set_limit(l, thread_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
2331                         ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_IGNORE);
2332                         thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT);
2333                 }
2334
2335                 return (0);
2336         }
2337
2338         if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) {
2339                 return (KERN_INVALID_ARGUMENT);
2340         }
2341
2342         l = thread->t_threadledger;
2343         if (l == LEDGER_NULL) {
2344                 /*
2345                  * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active.
2346                  */
2347                 if ((l = ledger_instantiate(thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL)
2348                         return (KERN_RESOURCE_SHORTAGE);
2349
2350                 /*
2351                  * We are the first to create this thread's ledger, so only activate our entry.
2352                  */
2353                 ledger_entry_setactive(l, thread_ledgers.cpu_time);
2354                 thread->t_threadledger = l;
2355         }
2356
2357         /*
2358          * The limit is specified as a percentage of CPU over an interval in nanoseconds.
2359          * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit.
2360          */
2361         limittime = (interval_ns * percentage) / 100;
2362         nanoseconds_to_absolutetime(limittime, &abstime);
2363         ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct);
2364         /*
2365          * Refill the thread's allotted CPU time every interval_ns nanoseconds.
2366          */
2367         ledger_set_period(l, thread_ledgers.cpu_time, interval_ns);
2368
2369         if (action == THREAD_CPULIMIT_EXCEPTION) {
2370                 /*
2371                  * We don't support programming the CPU usage monitor on a task if any of its
2372                  * threads have a per-thread blocking CPU limit configured.
2373                  */
2374                 if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2375                         panic("CPU usage monitor activated, but blocking thread limit exists");
2376                 }
2377
2378                 /*
2379                  * Make a note that this thread's CPU limit is being used for the task-wide CPU
2380                  * usage monitor. We don't have to arm the callback which will trigger the
2381                  * exception, because that was done for us in ledger_instantiate (because the
2382                  * ledger template used has a default callback).
2383                  */
2384                 thread->options |= TH_OPT_PROC_CPULIMIT;
2385         } else {
2386                 /*
2387                  * We deliberately override any CPU limit imposed by a task-wide limit (eg
2388                  * CPU usage monitor).
2389                  */
2390                 thread->options &= ~TH_OPT_PROC_CPULIMIT;
2391
2392                 thread->options |= TH_OPT_PRVT_CPULIMIT;
2393                 /* The per-thread ledger template by default has a callback for CPU time */
2394                 ledger_disable_callback(l, thread_ledgers.cpu_time);
2395                 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
2396         }
2397
2398         return (0);
2399 }
2400
2401 static void
2402 sched_call_null(
2403 __unused        int                     type,
2404 __unused        thread_t        thread)
2405 {
2406         return;
2407 }
2408
2409 void
2410 thread_sched_call(
2411         thread_t                thread,
2412         sched_call_t    call)
2413 {
2414         thread->sched_call = (call != NULL)? call: sched_call_null;
2415 }
2416
2417 sched_call_t
2418 thread_disable_sched_call(
2419         thread_t                thread,
2420         sched_call_t    call)
2421 {
2422         if (call) {
2423                 spl_t s = splsched();
2424                 thread_lock(thread);
2425                 if (thread->sched_call == call) {
2426                         thread->sched_call = sched_call_null;
2427                 } else {
2428                         call = NULL;
2429                 }
2430                 thread_unlock(thread);
2431                 splx(s);
2432         }
2433         return call;
2434 }
2435
2436 void
2437 thread_reenable_sched_call(
2438         thread_t                thread,
2439         sched_call_t    call)
2440 {
2441         if (call) {
2442                 spl_t s = splsched();
2443                 thread_lock(thread);
2444                 thread_sched_call(thread, call);
2445                 thread_unlock(thread);
2446                 splx(s);
2447         }
2448 }
2449
2450 void
2451 thread_static_param(
2452         thread_t                thread,
2453         boolean_t               state)
2454 {
2455         thread_mtx_lock(thread);
2456         thread->static_param = state;
2457         thread_mtx_unlock(thread);
2458 }
2459
2460 uint64_t
2461 thread_tid(
2462         thread_t        thread)
2463 {
2464         return (thread != THREAD_NULL? thread->thread_id: 0);
2465 }
2466
2467 uint16_t        thread_set_tag(thread_t th, uint16_t tag) {
2468         return thread_set_tag_internal(th, tag);
2469 }
2470 uint16_t        thread_get_tag(thread_t th) {
2471         return thread_get_tag_internal(th);
2472 }
2473
2474 uint64_t
2475 thread_dispatchqaddr(
2476         thread_t                thread)
2477 {
2478         uint64_t        dispatchqueue_addr;
2479         uint64_t        thread_handle;
2480
2481         if (thread == THREAD_NULL)
2482                 return 0;
2483
2484         thread_handle = thread->machine.cthread_self;
2485         if (thread_handle == 0)
2486                 return 0;
2487
2488         if (thread->inspection == TRUE)
2489                 dispatchqueue_addr = thread_handle + get_task_dispatchqueue_offset(thread->task);
2490         else if (thread->task->bsd_info)
2491                 dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
2492         else
2493                 dispatchqueue_addr = 0;
2494
2495         return dispatchqueue_addr;
2496 }
2497
2498 /*
2499  * Export routines to other components for things that are done as macros
2500  * within the osfmk component.
2501  */
2502
2503 #undef thread_reference
2504 void thread_reference(thread_t thread);
2505 void
2506 thread_reference(
2507         thread_t        thread)
2508 {
2509         if (thread != THREAD_NULL)
2510                 thread_reference_internal(thread);
2511 }
2512
2513 #undef thread_should_halt
2514
2515 boolean_t
2516 thread_should_halt(
2517         thread_t                th)
2518 {
2519         return (thread_should_halt_fast(th));
2520 }
2521
2522 /*
2523  * thread_set_voucher_name - reset the voucher port name bound to this thread
2524  *
2525  * Conditions:  nothing locked
2526  *
2527  *      If we already converted the previous name to a cached voucher
2528  *      reference, then we discard that reference here.  The next lookup
2529  *      will cache it again.
2530  */
2531
2532 kern_return_t
2533 thread_set_voucher_name(mach_port_name_t voucher_name)
2534 {
2535         thread_t thread = current_thread();
2536         ipc_voucher_t new_voucher = IPC_VOUCHER_NULL;
2537         ipc_voucher_t voucher;
2538 #ifdef CONFIG_BANK
2539         ledger_t bankledger = NULL;
2540 #endif
2541
2542         if (MACH_PORT_DEAD == voucher_name)
2543                 return KERN_INVALID_RIGHT;
2544
2545         /*
2546          * agressively convert to voucher reference
2547          */
2548         if (MACH_PORT_VALID(voucher_name)) {
2549                 new_voucher = convert_port_name_to_voucher(voucher_name);
2550                 if (IPC_VOUCHER_NULL == new_voucher)
2551                         return KERN_INVALID_ARGUMENT;
2552         }
2553 #ifdef CONFIG_BANK
2554         bankledger = bank_get_voucher_ledger(new_voucher);
2555 #endif
2556
2557         thread_mtx_lock(thread);
2558         voucher = thread->ith_voucher;
2559         thread->ith_voucher_name = voucher_name;
2560         thread->ith_voucher = new_voucher;
2561 #ifdef CONFIG_BANK
2562         bank_swap_thread_bank_ledger(thread, bankledger);
2563 #endif
2564         thread_mtx_unlock(thread);
2565
2566         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2567                                   MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2568                                   (uintptr_t)thread_tid(thread),
2569                                   (uintptr_t)voucher_name,
2570                                   VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
2571                                   1, 0);
2572
2573         if (IPC_VOUCHER_NULL != voucher)
2574                 ipc_voucher_release(voucher);
2575
2576         return KERN_SUCCESS;
2577 }
2578
2579 /*
2580  *  thread_get_mach_voucher - return a voucher reference for the specified thread voucher
2581  *
2582  *  Conditions:  nothing locked
2583  *
2584  *  A reference to the voucher may be lazily pending, if someone set the voucher name
2585  *  but nobody has done a lookup yet.  In that case, we'll have to do the equivalent
2586  *  lookup here.
2587  *
2588  *  NOTE:       At the moment, there is no distinction between the current and effective
2589  *              vouchers because we only set them at the thread level currently.
2590  */
2591 kern_return_t
2592 thread_get_mach_voucher(
2593         thread_act_t            thread,
2594         mach_voucher_selector_t __unused which,
2595         ipc_voucher_t           *voucherp)
2596 {
2597         ipc_voucher_t           voucher;
2598         mach_port_name_t        voucher_name;
2599
2600         if (THREAD_NULL == thread)
2601                 return KERN_INVALID_ARGUMENT;
2602
2603         thread_mtx_lock(thread);
2604         voucher = thread->ith_voucher;
2605
2606         /* if already cached, just return a ref */
2607         if (IPC_VOUCHER_NULL != voucher) {
2608                 ipc_voucher_reference(voucher);
2609                 thread_mtx_unlock(thread);
2610                 *voucherp = voucher;
2611                 return KERN_SUCCESS;
2612         }
2613
2614         voucher_name = thread->ith_voucher_name;
2615
2616         /* convert the name to a port, then voucher reference */
2617         if (MACH_PORT_VALID(voucher_name)) {
2618                 ipc_port_t port;
2619
2620                 if (KERN_SUCCESS !=
2621                     ipc_object_copyin(thread->task->itk_space, voucher_name,
2622                                       MACH_MSG_TYPE_COPY_SEND, (ipc_object_t *)&port)) {
2623                         thread->ith_voucher_name = MACH_PORT_NULL;
2624                         thread_mtx_unlock(thread);
2625                         *voucherp = IPC_VOUCHER_NULL;
2626                         return KERN_SUCCESS;
2627                 }
2628
2629                 /* convert to a voucher ref to return, and cache a ref on thread */
2630                 voucher = convert_port_to_voucher(port);
2631                 ipc_voucher_reference(voucher);
2632                 thread->ith_voucher = voucher;
2633                 thread_mtx_unlock(thread);
2634
2635                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2636                                           MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2637                                           (uintptr_t)thread_tid(thread),
2638                                           (uintptr_t)port,
2639                                           VM_KERNEL_ADDRPERM((uintptr_t)voucher),
2640                                           2, 0);
2641
2642
2643                 ipc_port_release_send(port);
2644         } else
2645                 thread_mtx_unlock(thread);
2646
2647         *voucherp = voucher;
2648         return KERN_SUCCESS;
2649 }
2650
2651 /*
2652  *  thread_set_mach_voucher - set a voucher reference for the specified thread voucher
2653  *
2654  *  Conditions: callers holds a reference on the voucher.
2655  *              nothing locked.
2656  *
2657  *  We grab another reference to the voucher and bind it to the thread.  Any lazy
2658  *  binding is erased.  The old voucher reference associated with the thread is
2659  *  discarded.
2660  */
2661 kern_return_t
2662 thread_set_mach_voucher(
2663         thread_t                thread,
2664         ipc_voucher_t           voucher)
2665 {
2666         ipc_voucher_t old_voucher;
2667 #ifdef CONFIG_BANK
2668         ledger_t bankledger = NULL;
2669 #endif
2670
2671         if (THREAD_NULL == thread)
2672                 return KERN_INVALID_ARGUMENT;
2673
2674         if (thread != current_thread() || thread->started)
2675                 return KERN_INVALID_ARGUMENT;
2676
2677
2678         ipc_voucher_reference(voucher);
2679 #ifdef CONFIG_BANK
2680         bankledger = bank_get_voucher_ledger(voucher);
2681 #endif
2682         thread_mtx_lock(thread);
2683         old_voucher = thread->ith_voucher;
2684         thread->ith_voucher = voucher;
2685         thread->ith_voucher_name = MACH_PORT_NULL;
2686 #ifdef CONFIG_BANK
2687         bank_swap_thread_bank_ledger(thread, bankledger);
2688 #endif
2689         thread_mtx_unlock(thread);
2690
2691         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2692                                   MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2693                                   (uintptr_t)thread_tid(thread),
2694                                   (uintptr_t)MACH_PORT_NULL,
2695                                   VM_KERNEL_ADDRPERM((uintptr_t)voucher),
2696                                   3, 0);
2697
2698         ipc_voucher_release(old_voucher);
2699
2700         return KERN_SUCCESS;
2701 }
2702
2703 /*
2704  *  thread_swap_mach_voucher - swap a voucher reference for the specified thread voucher
2705  *
2706  *  Conditions: callers holds a reference on the new and presumed old voucher(s).
2707  *              nothing locked.
2708  *
2709  *  If the old voucher is still the same as passed in, replace it with new voucher
2710  *  and discard the old (and the reference passed in).  Otherwise, discard the new
2711  *  and return an updated old voucher.
2712  */
2713 kern_return_t
2714 thread_swap_mach_voucher(
2715         thread_t                thread,
2716         ipc_voucher_t           new_voucher,
2717         ipc_voucher_t           *in_out_old_voucher)
2718 {
2719         mach_port_name_t old_voucher_name;
2720         ipc_voucher_t old_voucher;
2721 #ifdef CONFIG_BANK
2722         ledger_t bankledger = NULL;
2723 #endif
2724
2725         if (THREAD_NULL == thread)
2726                 return KERN_INVALID_TASK;
2727
2728         if (thread != current_thread() || thread->started)
2729                 return KERN_INVALID_ARGUMENT;
2730
2731 #ifdef CONFIG_BANK
2732         bankledger = bank_get_voucher_ledger(new_voucher);
2733 #endif
2734
2735         thread_mtx_lock(thread);
2736
2737         old_voucher = thread->ith_voucher;
2738
2739         if (IPC_VOUCHER_NULL == old_voucher) {
2740                 old_voucher_name = thread->ith_voucher_name;
2741
2742                 /* perform lazy binding if needed */
2743                 if (MACH_PORT_VALID(old_voucher_name)) {
2744                         old_voucher = convert_port_name_to_voucher(old_voucher_name);
2745                         thread->ith_voucher_name = MACH_PORT_NULL;
2746                         thread->ith_voucher = old_voucher;
2747
2748                         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2749                                                   MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2750                                                   (uintptr_t)thread_tid(thread),
2751                                                   (uintptr_t)old_voucher_name,
2752                                                   VM_KERNEL_ADDRPERM((uintptr_t)old_voucher),
2753                                                   4, 0);
2754
2755                 }
2756         }
2757
2758         /* swap in new voucher, if old voucher matches the one supplied */
2759         if (old_voucher == *in_out_old_voucher) {
2760                 ipc_voucher_reference(new_voucher);
2761                 thread->ith_voucher = new_voucher;
2762                 thread->ith_voucher_name = MACH_PORT_NULL;
2763 #ifdef CONFIG_BANK
2764                 bank_swap_thread_bank_ledger(thread, bankledger);
2765 #endif
2766                 thread_mtx_unlock(thread);
2767
2768                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2769                                           MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2770                                           (uintptr_t)thread_tid(thread),
2771                                           (uintptr_t)MACH_PORT_NULL,
2772                                           VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
2773                                           5, 0);
2774
2775                 ipc_voucher_release(old_voucher);
2776
2777                 *in_out_old_voucher = IPC_VOUCHER_NULL;
2778                 return KERN_SUCCESS;
2779         }
2780
2781         /* Otherwise, just return old voucher reference */
2782         ipc_voucher_reference(old_voucher);
2783         thread_mtx_unlock(thread);
2784         *in_out_old_voucher = old_voucher;
2785         return KERN_SUCCESS;
2786 }
2787
2788 /*
2789  *  thread_get_current_voucher_origin_pid - get the pid of the originator of the current voucher.
2790  */
2791 kern_return_t
2792 thread_get_current_voucher_origin_pid(
2793         int32_t      *pid)
2794 {
2795         uint32_t buf_size;
2796         kern_return_t kr;
2797         thread_t thread = current_thread();
2798
2799         buf_size = sizeof(*pid);
2800         kr = mach_voucher_attr_command(thread->ith_voucher,
2801                 MACH_VOUCHER_ATTR_KEY_BANK,
2802                 BANK_ORIGINATOR_PID,
2803                 NULL,
2804                 0,
2805                 (mach_voucher_attr_content_t)pid,
2806                 &buf_size);
2807
2808         return kr;
2809 }
2810
2811 boolean_t
2812 thread_has_thread_name(thread_t th)
2813 {
2814         if ((th) && (th->uthread)) {
2815                 return bsd_hasthreadname(th->uthread);
2816         }
2817
2818         /*
2819          * This is an odd case; clients may set the thread name based on the lack of
2820          * a name, but in this context there is no uthread to attach the name to.
2821          */
2822         return FALSE;
2823 }
2824
2825 void
2826 thread_set_thread_name(thread_t th, const char* name)
2827 {
2828         if ((th) && (th->uthread) && name) {
2829                 bsd_setthreadname(th->uthread, name);
2830         }
2831 }
2832
2833 /*
2834  * thread_enable_send_importance - set/clear the SEND_IMPORTANCE thread option bit.
2835  */
2836 void thread_enable_send_importance(thread_t thread, boolean_t enable)
2837 {
2838         if (enable == TRUE)
2839                 thread->options |= TH_OPT_SEND_IMPORTANCE;
2840         else
2841                 thread->options &= ~TH_OPT_SEND_IMPORTANCE;
2842 }
2843
2844 #if CONFIG_DTRACE
2845 uint32_t dtrace_get_thread_predcache(thread_t thread)
2846 {
2847         if (thread != THREAD_NULL)
2848                 return thread->t_dtrace_predcache;
2849         else
2850                 return 0;
2851 }
2852
2853 int64_t dtrace_get_thread_vtime(thread_t thread)
2854 {
2855         if (thread != THREAD_NULL)
2856                 return thread->t_dtrace_vtime;
2857         else
2858                 return 0;
2859 }
2860
2861 int dtrace_get_thread_last_cpu_id(thread_t thread)
2862 {
2863         if ((thread != THREAD_NULL) && (thread->last_processor != PROCESSOR_NULL)) {
2864                 return thread->last_processor->cpu_id;
2865         } else {
2866                 return -1;
2867         }
2868 }
2869
2870 int64_t dtrace_get_thread_tracing(thread_t thread)
2871 {
2872         if (thread != THREAD_NULL)
2873                 return thread->t_dtrace_tracing;
2874         else
2875                 return 0;
2876 }
2877
2878 boolean_t dtrace_get_thread_reentering(thread_t thread)
2879 {
2880         if (thread != THREAD_NULL)
2881                 return (thread->options & TH_OPT_DTRACE) ? TRUE : FALSE;
2882         else
2883                 return 0;
2884 }
2885
2886 vm_offset_t dtrace_get_kernel_stack(thread_t thread)
2887 {
2888         if (thread != THREAD_NULL)
2889                 return thread->kernel_stack;
2890         else
2891                 return 0;
2892 }
2893
2894 int64_t dtrace_calc_thread_recent_vtime(thread_t thread)
2895 {
2896         if (thread != THREAD_NULL) {
2897                 processor_t             processor = current_processor();
2898                 uint64_t                                abstime = mach_absolute_time();
2899                 timer_t                                 timer;
2900
2901                 timer = PROCESSOR_DATA(processor, thread_timer);
2902
2903                 return timer_grab(&(thread->system_timer)) + timer_grab(&(thread->user_timer)) +
2904                                 (abstime - timer->tstamp); /* XXX need interrupts off to prevent missed time? */
2905         } else
2906                 return 0;
2907 }
2908
2909 void dtrace_set_thread_predcache(thread_t thread, uint32_t predcache)
2910 {
2911         if (thread != THREAD_NULL)
2912                 thread->t_dtrace_predcache = predcache;
2913 }
2914
2915 void dtrace_set_thread_vtime(thread_t thread, int64_t vtime)
2916 {
2917         if (thread != THREAD_NULL)
2918                 thread->t_dtrace_vtime = vtime;
2919 }
2920
2921 void dtrace_set_thread_tracing(thread_t thread, int64_t accum)
2922 {
2923         if (thread != THREAD_NULL)
2924                 thread->t_dtrace_tracing = accum;
2925 }
2926
2927 void dtrace_set_thread_reentering(thread_t thread, boolean_t vbool)
2928 {
2929         if (thread != THREAD_NULL) {
2930                 if (vbool)
2931                         thread->options |= TH_OPT_DTRACE;
2932                 else
2933                         thread->options &= (~TH_OPT_DTRACE);
2934         }
2935 }
2936
2937 vm_offset_t dtrace_set_thread_recover(thread_t thread, vm_offset_t recover)
2938 {
2939         vm_offset_t prev = 0;
2940
2941         if (thread != THREAD_NULL) {
2942                 prev = thread->recover;
2943                 thread->recover = recover;
2944         }
2945         return prev;
2946 }
2947
2948 void dtrace_thread_bootstrap(void)
2949 {
2950         task_t task = current_task();
2951
2952         if (task->thread_count == 1) {
2953                 thread_t thread = current_thread();
2954                 if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) {
2955                         thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS;
2956                         DTRACE_PROC(exec__success);
2957                         KDBG(BSDDBG_CODE(DBG_BSD_PROC,BSD_PROC_EXEC),
2958                              task_pid(task));
2959                 }
2960                 DTRACE_PROC(start);
2961         }
2962         DTRACE_PROC(lwp__start);
2963
2964 }
2965
2966 void
2967 dtrace_thread_didexec(thread_t thread)
2968 {
2969         thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS;
2970 }
2971 #endif /* CONFIG_DTRACE */