osfmk/kern/thread.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   kern/thread.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub
  61  *      Date:   1986
  62  *
  63  *      Thread management primitives implementation.
  64  */
  65 /*
  66  * Copyright (c) 1993 The University of Utah and
  67  * the Computer Systems Laboratory (CSL).  All rights reserved.
  68  *
  69  * Permission to use, copy, modify and distribute this software and its
  70  * documentation is hereby granted, provided that both the copyright
  71  * notice and this permission notice appear in all copies of the
  72  * software, derivative works or modified versions, and any portions
  73  * thereof, and that both notices appear in supporting documentation.
  74  *
  75  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  76  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  77  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  78  *
  79  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  80  * improvements that they make and grant CSL redistribution rights.
  81  *
  82  */
  83
  84 #include <mach/mach_types.h>
  85 #include <mach/boolean.h>
  86 #include <mach/policy.h>
  87 #include <mach/thread_info.h>
  88 #include <mach/thread_special_ports.h>
  89 #include <mach/thread_status.h>
  90 #include <mach/time_value.h>
  91 #include <mach/vm_param.h>
  92
  93 #include <machine/thread.h>
  94 #include <machine/pal_routines.h>
  95 #include <machine/limits.h>
  96
  97 #include <kern/kern_types.h>
  98 #include <kern/kalloc.h>
  99 #include <kern/cpu_data.h>
 100 #include <kern/counters.h>
 101 #include <kern/extmod_statistics.h>
 102 #include <kern/ipc_mig.h>
 103 #include <kern/ipc_tt.h>
 104 #include <kern/mach_param.h>
 105 #include <kern/machine.h>
 106 #include <kern/misc_protos.h>
 107 #include <kern/processor.h>
 108 #include <kern/queue.h>
 109 #include <kern/sched.h>
 110 #include <kern/sched_prim.h>
 111 #include <kern/sync_lock.h>
 112 #include <kern/syscall_subr.h>
 113 #include <kern/task.h>
 114 #include <kern/thread.h>
 115 #include <kern/host.h>
 116 #include <kern/zalloc.h>
 117 #include <kern/assert.h>
 118 #include <kern/exc_resource.h>
 119 #include <kern/telemetry.h>
 120 #include <kern/policy_internal.h>
 121
 122 #include <corpses/task_corpse.h>
 123 #if KPC
 124 #include <kern/kpc.h>
 125 #endif
 126
 127 #include <ipc/ipc_kmsg.h>
 128 #include <ipc/ipc_port.h>
 129 #include <bank/bank_types.h>
 130
 131 #include <vm/vm_kern.h>
 132 #include <vm/vm_pageout.h>
 133
 134 #include <sys/kdebug.h>
 135 #include <sys/bsdtask_info.h>
 136 #include <mach/sdt.h>
 137
 138 /*
 139  * Exported interfaces
 140  */
 141 #include <mach/task_server.h>
 142 #include <mach/thread_act_server.h>
 143 #include <mach/mach_host_server.h>
 144 #include <mach/host_priv_server.h>
 145 #include <mach/mach_voucher_server.h>
 146 #include <kern/policy_internal.h>
 147
 148 static struct zone                      *thread_zone;
 149 static lck_grp_attr_t           thread_lck_grp_attr;
 150 lck_attr_t                                      thread_lck_attr;
 151 lck_grp_t                                       thread_lck_grp;
 152
 153 struct zone                                     *thread_qos_override_zone;
 154
 155 decl_simple_lock_data(static,thread_stack_lock)
 156 static queue_head_t             thread_stack_queue;
 157
 158 decl_simple_lock_data(static,thread_terminate_lock)
 159 static queue_head_t             thread_terminate_queue;
 160
 161 static queue_head_t             crashed_threads_queue;
 162
 163 decl_simple_lock_data(static,thread_exception_lock)
 164 static queue_head_t             thread_exception_queue;
 165
 166 struct thread_exception_elt {
 167         queue_chain_t   elt;
 168         task_t          exception_task;
 169         thread_t        exception_thread;
 170 };
 171
 172 static struct thread    thread_template, init_thread;
 173
 174 static void             sched_call_null(
 175                                         int                     type,
 176                                         thread_t        thread);
 177
 178 #ifdef MACH_BSD
 179 extern void proc_exit(void *);
 180 extern mach_exception_data_type_t proc_encode_exit_exception_code(void *);
 181 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 182 extern int      proc_selfpid(void);
 183 extern char *   proc_name_address(void *p);
 184 #endif /* MACH_BSD */
 185
 186 extern int disable_exc_resource;
 187 extern int audio_active;
 188 extern int debug_task;
 189 int thread_max = CONFIG_THREAD_MAX;     /* Max number of threads */
 190 int task_threadmax = CONFIG_THREAD_MAX;
 191
 192 static uint64_t         thread_unique_id = 100;
 193
 194 struct _thread_ledger_indices thread_ledgers = { -1 };
 195 static ledger_template_t thread_ledger_template = NULL;
 196 static void init_thread_ledgers(void);
 197
 198 #if CONFIG_JETSAM
 199 void jetsam_on_ledger_cpulimit_exceeded(void);
 200 #endif
 201
 202 /*
 203  * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry.
 204  *
 205  * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user
 206  *  stacktraces, aka micro-stackshots)
 207  */
 208 #define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70
 209
 210 int cpumon_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 211 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void);
 212
 213 /*
 214  * The smallest interval over which we support limiting CPU consumption is 1ms
 215  */
 216 #define MINIMUM_CPULIMIT_INTERVAL_MS 1
 217
 218 void
 219 thread_bootstrap(void)
 220 {
 221         /*
 222          *      Fill in a template thread for fast initialization.
 223          */
 224
 225 #if MACH_ASSERT
 226         thread_template.thread_magic = THREAD_MAGIC;
 227 #endif /* MACH_ASSERT */
 228
 229         thread_template.runq = PROCESSOR_NULL;
 230
 231         thread_template.ref_count = 2;
 232
 233         thread_template.reason = AST_NONE;
 234         thread_template.at_safe_point = FALSE;
 235         thread_template.wait_event = NO_EVENT64;
 236         thread_template.waitq = NULL;
 237         thread_template.wait_result = THREAD_WAITING;
 238         thread_template.options = THREAD_ABORTSAFE;
 239         thread_template.state = TH_WAIT | TH_UNINT;
 240         thread_template.wake_active = FALSE;
 241         thread_template.continuation = THREAD_CONTINUE_NULL;
 242         thread_template.parameter = NULL;
 243
 244         thread_template.importance = 0;
 245         thread_template.sched_mode = TH_MODE_NONE;
 246         thread_template.sched_flags = 0;
 247         thread_template.saved_mode = TH_MODE_NONE;
 248         thread_template.safe_release = 0;
 249         thread_template.th_sched_bucket = TH_BUCKET_RUN;
 250
 251         thread_template.sfi_class = SFI_CLASS_UNSPECIFIED;
 252         thread_template.sfi_wait_class = SFI_CLASS_UNSPECIFIED;
 253
 254         thread_template.active = 0;
 255         thread_template.started = 0;
 256         thread_template.static_param = 0;
 257         thread_template.policy_reset = 0;
 258
 259         thread_template.base_pri = BASEPRI_DEFAULT;
 260         thread_template.sched_pri = 0;
 261         thread_template.max_priority = 0;
 262         thread_template.task_priority = 0;
 263         thread_template.promotions = 0;
 264         thread_template.pending_promoter_index = 0;
 265         thread_template.pending_promoter[0] = NULL;
 266         thread_template.pending_promoter[1] = NULL;
 267         thread_template.rwlock_count = 0;
 268
 269
 270         thread_template.realtime.deadline = UINT64_MAX;
 271
 272         thread_template.quantum_remaining = 0;
 273         thread_template.last_run_time = 0;
 274         thread_template.last_made_runnable_time = 0;
 275
 276         thread_template.computation_metered = 0;
 277         thread_template.computation_epoch = 0;
 278
 279 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
 280         thread_template.sched_stamp = 0;
 281         thread_template.pri_shift = INT8_MAX;
 282         thread_template.sched_usage = 0;
 283         thread_template.cpu_usage = thread_template.cpu_delta = 0;
 284 #endif
 285         thread_template.c_switch = thread_template.p_switch = thread_template.ps_switch = 0;
 286
 287         thread_template.bound_processor = PROCESSOR_NULL;
 288         thread_template.last_processor = PROCESSOR_NULL;
 289
 290         thread_template.sched_call = sched_call_null;
 291
 292         timer_init(&thread_template.user_timer);
 293         timer_init(&thread_template.system_timer);
 294         thread_template.user_timer_save = 0;
 295         thread_template.system_timer_save = 0;
 296         thread_template.vtimer_user_save = 0;
 297         thread_template.vtimer_prof_save = 0;
 298         thread_template.vtimer_rlim_save = 0;
 299         thread_template.vtimer_qos_save  = 0;
 300
 301 #if CONFIG_SCHED_SFI
 302         thread_template.wait_sfi_begin_time = 0;
 303 #endif
 304
 305         thread_template.wait_timer_is_set = FALSE;
 306         thread_template.wait_timer_active = 0;
 307
 308         thread_template.depress_timer_active = 0;
 309
 310         thread_template.recover = (vm_offset_t)NULL;
 311
 312         thread_template.map = VM_MAP_NULL;
 313
 314 #if CONFIG_DTRACE
 315         thread_template.t_dtrace_predcache = 0;
 316         thread_template.t_dtrace_vtime = 0;
 317         thread_template.t_dtrace_tracing = 0;
 318 #endif /* CONFIG_DTRACE */
 319
 320 #if KPERF
 321         thread_template.kperf_flags = 0;
 322         thread_template.kperf_pet_gen = 0;
 323         thread_template.kperf_c_switch = 0;
 324         thread_template.kperf_pet_cnt = 0;
 325 #endif
 326
 327 #if KPC
 328         thread_template.kpc_buf = NULL;
 329 #endif
 330
 331 #if HYPERVISOR
 332         thread_template.hv_thread_target = NULL;
 333 #endif /* HYPERVISOR */
 334
 335 #if (DEVELOPMENT || DEBUG)
 336         thread_template.t_page_creation_throttled_hard = 0;
 337         thread_template.t_page_creation_throttled_soft = 0;
 338 #endif /* DEVELOPMENT || DEBUG */
 339         thread_template.t_page_creation_throttled = 0;
 340         thread_template.t_page_creation_count = 0;
 341         thread_template.t_page_creation_time = 0;
 342
 343         thread_template.affinity_set = NULL;
 344
 345         thread_template.syscalls_unix = 0;
 346         thread_template.syscalls_mach = 0;
 347
 348         thread_template.t_ledger = LEDGER_NULL;
 349         thread_template.t_threadledger = LEDGER_NULL;
 350 #ifdef CONFIG_BANK
 351         thread_template.t_bankledger = LEDGER_NULL;
 352         thread_template.t_deduct_bank_ledger_time = 0;
 353 #endif
 354
 355         thread_template.requested_policy = (struct thread_requested_policy) {};
 356         thread_template.effective_policy = (struct thread_effective_policy) {};
 357
 358         bzero(&thread_template.overrides, sizeof(thread_template.overrides));
 359
 360         thread_template.iotier_override = THROTTLE_LEVEL_NONE;
 361         thread_template.thread_io_stats = NULL;
 362         thread_template.thread_callout_interrupt_wakeups = thread_template.thread_callout_platform_idle_wakeups = 0;
 363
 364         thread_template.thread_timer_wakeups_bin_1 = thread_template.thread_timer_wakeups_bin_2 = 0;
 365         thread_template.callout_woken_from_icontext = thread_template.callout_woken_from_platform_idle = 0;
 366
 367         thread_template.thread_tag = 0;
 368
 369         thread_template.ith_voucher_name = MACH_PORT_NULL;
 370         thread_template.ith_voucher = IPC_VOUCHER_NULL;
 371
 372         thread_template.work_interval_id = 0;
 373
 374         init_thread = thread_template;
 375         machine_set_current_thread(&init_thread);
 376 }
 377
 378 extern boolean_t allow_qos_policy_set;
 379
 380 void
 381 thread_init(void)
 382 {
 383         thread_zone = zinit(
 384                         sizeof(struct thread),
 385                         thread_max * sizeof(struct thread),
 386                         THREAD_CHUNK * sizeof(struct thread),
 387                         "threads");
 388
 389         thread_qos_override_zone = zinit(
 390                 sizeof(struct thread_qos_override),
 391                 4 * thread_max * sizeof(struct thread_qos_override),
 392                 PAGE_SIZE,
 393                 "thread qos override");
 394         zone_change(thread_qos_override_zone, Z_EXPAND, TRUE);
 395         zone_change(thread_qos_override_zone, Z_COLLECT, TRUE);
 396         zone_change(thread_qos_override_zone, Z_CALLERACCT, FALSE);
 397         zone_change(thread_qos_override_zone, Z_NOENCRYPT, TRUE);
 398
 399         lck_grp_attr_setdefault(&thread_lck_grp_attr);
 400         lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr);
 401         lck_attr_setdefault(&thread_lck_attr);
 402
 403         stack_init();
 404
 405         thread_policy_init();
 406
 407         /*
 408          *      Initialize any machine-dependent
 409          *      per-thread structures necessary.
 410          */
 411         machine_thread_init();
 412
 413         if (!PE_parse_boot_argn("cpumon_ustackshots_trigger_pct", &cpumon_ustackshots_trigger_pct,
 414                 sizeof (cpumon_ustackshots_trigger_pct))) {
 415                 cpumon_ustackshots_trigger_pct = CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT;
 416         }
 417
 418         PE_parse_boot_argn("-qos-policy-allow", &allow_qos_policy_set, sizeof(allow_qos_policy_set));
 419
 420         init_thread_ledgers();
 421 }
 422
 423 boolean_t
 424 thread_is_active(thread_t thread)
 425 {
 426         return (thread->active);
 427 }
 428
 429 void
 430 thread_corpse_continue(void)
 431 {
 432         thread_t thread = current_thread();
 433
 434         thread_terminate_internal(thread);
 435         ml_set_interrupts_enabled(FALSE);
 436         ast_taken(AST_APC, TRUE);
 437
 438         panic("thread_corpse_continue");
 439         /*NOTREACHED*/
 440 }
 441
 442 static void
 443 thread_terminate_continue(void)
 444 {
 445         panic("thread_terminate_continue");
 446         /*NOTREACHED*/
 447 }
 448
 449 /*
 450  *      thread_terminate_self:
 451  */
 452 void
 453 thread_terminate_self(void)
 454 {
 455         thread_t                thread = current_thread();
 456         task_t                  task;
 457         spl_t                   s;
 458         int threadcnt;
 459
 460         pal_thread_terminate_self(thread);
 461
 462         DTRACE_PROC(lwp__exit);
 463
 464         thread_mtx_lock(thread);
 465
 466         ipc_thread_disable(thread);
 467
 468         thread_mtx_unlock(thread);
 469
 470         s = splsched();
 471         thread_lock(thread);
 472
 473         /*
 474          *      Cancel priority depression, wait for concurrent expirations
 475          *      on other processors.
 476          */
 477         if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
 478                 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
 479
 480                 /* If our priority was low because of a depressed yield, restore it in case we block below */
 481                 thread_recompute_sched_pri(thread, FALSE);
 482
 483                 if (timer_call_cancel(&thread->depress_timer))
 484                         thread->depress_timer_active--;
 485         }
 486
 487         while (thread->depress_timer_active > 0) {
 488                 thread_unlock(thread);
 489                 splx(s);
 490
 491                 delay(1);
 492
 493                 s = splsched();
 494                 thread_lock(thread);
 495         }
 496
 497         thread_sched_call(thread, NULL);
 498
 499         thread_unlock(thread);
 500         splx(s);
 501
 502
 503         thread_mtx_lock(thread);
 504
 505         thread_policy_reset(thread);
 506
 507         thread_mtx_unlock(thread);
 508
 509         task = thread->task;
 510         uthread_cleanup(task, thread->uthread, task->bsd_info);
 511         threadcnt = hw_atomic_sub(&task->active_thread_count, 1);
 512
 513         if (task->bsd_info && !task_is_exec_copy(task)) {
 514                 /* trace out pid before we sign off */
 515                 long    dbg_arg1 = 0;
 516
 517                 kdbg_trace_data(thread->task->bsd_info, &dbg_arg1);
 518
 519                 KERNEL_DEBUG_CONSTANT(TRACE_DATA_THREAD_TERMINATE_PID | DBG_FUNC_NONE,
 520                         dbg_arg1, 0, 0, 0, 0);
 521         }
 522
 523         /*
 524          * If we are the last thread to terminate and the task is
 525          * associated with a BSD process, perform BSD process exit.
 526          */
 527         if (threadcnt == 0 && task->bsd_info != NULL && !task_is_exec_copy(task)) {
 528                 mach_exception_data_type_t subcode = 0;
 529                 {
 530                         /* since we're the last thread in this process, trace out the command name too */
 531                         long    dbg_arg1 = 0, dbg_arg2 = 0, dbg_arg3 = 0, dbg_arg4 = 0;
 532
 533                         kdbg_trace_string(thread->task->bsd_info, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
 534
 535                         KERNEL_DEBUG_CONSTANT(TRACE_STRING_PROC_EXIT | DBG_FUNC_NONE,
 536                                 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
 537                 }
 538
 539                 /* Get the exit reason before proc_exit */
 540                 subcode = proc_encode_exit_exception_code(task->bsd_info);
 541                 proc_exit(task->bsd_info);
 542                 /*
 543                  * if there is crash info in task
 544                  * then do the deliver action since this is
 545                  * last thread for this task.
 546                  */
 547                 if (task->corpse_info) {
 548                         task_deliver_crash_notification(task, current_thread(), subcode);
 549                 }
 550         }
 551
 552         if (threadcnt == 0) {
 553                 task_lock(task);
 554                 if (task_is_a_corpse_fork(task)) {
 555                         thread_wakeup((event_t)&task->active_thread_count);
 556                 }
 557                 task_unlock(task);
 558         }
 559
 560         uthread_cred_free(thread->uthread);
 561
 562         s = splsched();
 563         thread_lock(thread);
 564
 565         /*
 566          *      Cancel wait timer, and wait for
 567          *      concurrent expirations.
 568          */
 569         if (thread->wait_timer_is_set) {
 570                 thread->wait_timer_is_set = FALSE;
 571
 572                 if (timer_call_cancel(&thread->wait_timer))
 573                         thread->wait_timer_active--;
 574         }
 575
 576         while (thread->wait_timer_active > 0) {
 577                 thread_unlock(thread);
 578                 splx(s);
 579
 580                 delay(1);
 581
 582                 s = splsched();
 583                 thread_lock(thread);
 584         }
 585
 586         /*
 587          *      If there is a reserved stack, release it.
 588          */
 589         if (thread->reserved_stack != 0) {
 590                 stack_free_reserved(thread);
 591                 thread->reserved_stack = 0;
 592         }
 593
 594         /*
 595          *      Mark thread as terminating, and block.
 596          */
 597         thread->state |= TH_TERMINATE;
 598         thread_mark_wait_locked(thread, THREAD_UNINT);
 599         assert((thread->sched_flags & TH_SFLAG_PROMOTED) == 0);
 600         assert(thread->promotions == 0);
 601         assert(!(thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED));
 602         assert(thread->rwlock_count == 0);
 603         thread_unlock(thread);
 604         /* splsched */
 605
 606         thread_block((thread_continue_t)thread_terminate_continue);
 607         /*NOTREACHED*/
 608 }
 609
 610 /* Drop a thread refcount that definitely isn't the last one. */
 611 void
 612 thread_deallocate_safe(thread_t thread)
 613 {
 614         assert_thread_magic(thread);
 615
 616         uint32_t old_refcount = hw_atomic_sub(&(thread)->ref_count, 1) + 1;
 617
 618         if (__improbable(old_refcount <= 1))
 619                 panic("bad thread refcount: %d", old_refcount);
 620 }
 621
 622 void
 623 thread_deallocate(
 624         thread_t                        thread)
 625 {
 626         task_t                          task;
 627
 628         if (thread == THREAD_NULL)
 629                 return;
 630
 631         assert_thread_magic(thread);
 632         assert(thread->ref_count > 0);
 633
 634         if (__probable(hw_atomic_sub(&(thread)->ref_count, 1) > 0))
 635                 return;
 636
 637         if(!(thread->state & TH_TERMINATE2))
 638                 panic("thread_deallocate: thread not properly terminated\n");
 639
 640         assert(thread->runq == PROCESSOR_NULL);
 641
 642         assert(thread->user_promotions == 0);
 643
 644 #if KPC
 645         kpc_thread_destroy(thread);
 646 #endif
 647
 648         ipc_thread_terminate(thread);
 649
 650         proc_thread_qos_deallocate(thread);
 651
 652         task = thread->task;
 653
 654 #ifdef MACH_BSD
 655         {
 656                 void *ut = thread->uthread;
 657
 658                 thread->uthread = NULL;
 659                 uthread_zone_free(ut);
 660         }
 661 #endif  /* MACH_BSD */
 662
 663         if (thread->t_ledger)
 664                 ledger_dereference(thread->t_ledger);
 665         if (thread->t_threadledger)
 666                 ledger_dereference(thread->t_threadledger);
 667
 668         if (IPC_VOUCHER_NULL != thread->ith_voucher)
 669                 ipc_voucher_release(thread->ith_voucher);
 670
 671         if (thread->thread_io_stats)
 672                 kfree(thread->thread_io_stats, sizeof(struct io_stat_info));
 673
 674         if (thread->kernel_stack != 0)
 675                 stack_free(thread);
 676
 677         lck_mtx_destroy(&thread->mutex, &thread_lck_grp);
 678         machine_thread_destroy(thread);
 679
 680         task_deallocate(task);
 681
 682 #if MACH_ASSERT
 683         assert_thread_magic(thread);
 684         thread->thread_magic = 0;
 685 #endif /* MACH_ASSERT */
 686
 687         zfree(thread_zone, thread);
 688 }
 689
 690 /*
 691  *      thread_exception_daemon:
 692  *
 693  *      Deliver EXC_RESOURCE exception
 694  */
 695 static void
 696 thread_exception_daemon(void)
 697 {
 698         struct thread_exception_elt *elt;
 699         task_t task;
 700         thread_t thread;
 701
 702         simple_lock(&thread_exception_lock);
 703         while ((elt = (struct thread_exception_elt *)dequeue_head(&thread_exception_queue)) != NULL) {
 704                 simple_unlock(&thread_exception_lock);
 705
 706                 task = elt->exception_task;
 707                 thread = elt->exception_thread;
 708                 assert_thread_magic(thread);
 709
 710                 kfree(elt, sizeof(struct thread_exception_elt));
 711
 712                 /* wait for all the threads in the task to terminate */
 713                 task_lock(task);
 714                 task_wait_till_threads_terminate_locked(task);
 715                 task_unlock(task);
 716
 717                 /* Consumes the task ref returned by task_generate_corpse_internal */
 718                 task_deallocate(task);
 719                 /* Consumes the thread ref returned by task_generate_corpse_internal */
 720                 thread_deallocate(thread);
 721
 722                 /* Deliver the EXC_RESOURCE notification, also clears the corpse. */
 723                 task_deliver_crash_notification(task, thread, 0);
 724
 725                 simple_lock(&thread_exception_lock);
 726         }
 727
 728         assert_wait((event_t)&thread_exception_queue, THREAD_UNINT);
 729         simple_unlock(&thread_exception_lock);
 730
 731         thread_block((thread_continue_t)thread_exception_daemon);
 732 }
 733
 734 /*
 735  *      thread_exception_enqueue:
 736  *
 737  *      Enqueue a corpse port to be delivered an EXC_RESOURCE.
 738  */
 739 void
 740 thread_exception_enqueue(
 741         task_t          task,
 742         thread_t        thread)
 743 {
 744         struct thread_exception_elt *elt = (struct thread_exception_elt*) kalloc(
 745                                                 sizeof(struct thread_exception_elt));
 746
 747         elt->exception_task = task;
 748         elt->exception_thread = thread;
 749
 750         simple_lock(&thread_exception_lock);
 751         enqueue_tail(&thread_exception_queue, (queue_entry_t)elt);
 752         simple_unlock(&thread_exception_lock);
 753
 754         thread_wakeup((event_t)&thread_exception_queue);
 755 }
 756
 757 /*
 758  *      thread_copy_resource_info
 759  *
 760  *      Copy the resource info counters from source
 761  *      thread to destination thread.
 762  */
 763 void
 764 thread_copy_resource_info(
 765         thread_t dst_thread,
 766         thread_t src_thread)
 767 {
 768         dst_thread->thread_tag = src_thread->thread_tag;
 769         dst_thread->c_switch = src_thread->c_switch;
 770         dst_thread->p_switch = src_thread->p_switch;
 771         dst_thread->ps_switch = src_thread->ps_switch;
 772         dst_thread->precise_user_kernel_time = src_thread->precise_user_kernel_time;
 773         dst_thread->user_timer = src_thread->user_timer;
 774         dst_thread->user_timer_save = src_thread->user_timer_save;
 775         dst_thread->system_timer_save = src_thread->system_timer_save;
 776         dst_thread->syscalls_unix = src_thread->syscalls_unix;
 777         dst_thread->syscalls_mach = src_thread->syscalls_mach;
 778         ledger_rollup(dst_thread->t_threadledger, src_thread->t_threadledger);
 779         *dst_thread->thread_io_stats = *src_thread->thread_io_stats;
 780
 781 }
 782
 783 /*
 784  *      thread_terminate_daemon:
 785  *
 786  *      Perform final clean up for terminating threads.
 787  */
 788 static void
 789 thread_terminate_daemon(void)
 790 {
 791         thread_t        self, thread;
 792         task_t          task;
 793
 794         self = current_thread();
 795         self->options |= TH_OPT_SYSTEM_CRITICAL;
 796
 797         (void)splsched();
 798         simple_lock(&thread_terminate_lock);
 799
 800         while ((thread = qe_dequeue_head(&thread_terminate_queue, struct thread, runq_links)) != THREAD_NULL) {
 801                 assert_thread_magic(thread);
 802
 803                 /*
 804                  * if marked for crash reporting, skip reaping.
 805                  * The corpse delivery thread will clear bit and enqueue
 806                  * for reaping when done
 807                  */
 808                 if (thread->inspection){
 809                         enqueue_tail(&crashed_threads_queue, &thread->runq_links);
 810                         continue;
 811                 }
 812
 813                 simple_unlock(&thread_terminate_lock);
 814                 (void)spllo();
 815
 816                 task = thread->task;
 817
 818                 task_lock(task);
 819                 task->total_user_time += timer_grab(&thread->user_timer);
 820                 if (thread->precise_user_kernel_time) {
 821                         task->total_system_time += timer_grab(&thread->system_timer);
 822                 } else {
 823                         task->total_user_time += timer_grab(&thread->system_timer);
 824                 }
 825
 826                 task->c_switch += thread->c_switch;
 827                 task->p_switch += thread->p_switch;
 828                 task->ps_switch += thread->ps_switch;
 829
 830                 task->syscalls_unix += thread->syscalls_unix;
 831                 task->syscalls_mach += thread->syscalls_mach;
 832
 833                 task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
 834                 task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
 835                 task->task_gpu_ns += ml_gpu_stat(thread);
 836                 task->task_energy += ml_energy_stat(thread);
 837
 838                 thread_update_qos_cpu_time(thread);
 839
 840                 queue_remove(&task->threads, thread, thread_t, task_threads);
 841                 task->thread_count--;
 842
 843                 /*
 844                  * If the task is being halted, and there is only one thread
 845                  * left in the task after this one, then wakeup that thread.
 846                  */
 847                 if (task->thread_count == 1 && task->halting)
 848                         thread_wakeup((event_t)&task->halting);
 849
 850                 task_unlock(task);
 851
 852                 lck_mtx_lock(&tasks_threads_lock);
 853                 queue_remove(&threads, thread, thread_t, threads);
 854                 threads_count--;
 855                 lck_mtx_unlock(&tasks_threads_lock);
 856
 857                 thread_deallocate(thread);
 858
 859                 (void)splsched();
 860                 simple_lock(&thread_terminate_lock);
 861         }
 862
 863         assert_wait((event_t)&thread_terminate_queue, THREAD_UNINT);
 864         simple_unlock(&thread_terminate_lock);
 865         /* splsched */
 866
 867         self->options &= ~TH_OPT_SYSTEM_CRITICAL;
 868         thread_block((thread_continue_t)thread_terminate_daemon);
 869         /*NOTREACHED*/
 870 }
 871
 872 /*
 873  *      thread_terminate_enqueue:
 874  *
 875  *      Enqueue a terminating thread for final disposition.
 876  *
 877  *      Called at splsched.
 878  */
 879 void
 880 thread_terminate_enqueue(
 881         thread_t                thread)
 882 {
 883         KERNEL_DEBUG_CONSTANT(TRACE_DATA_THREAD_TERMINATE | DBG_FUNC_NONE, thread->thread_id, 0, 0, 0, 0);
 884
 885         simple_lock(&thread_terminate_lock);
 886         enqueue_tail(&thread_terminate_queue, &thread->runq_links);
 887         simple_unlock(&thread_terminate_lock);
 888
 889         thread_wakeup((event_t)&thread_terminate_queue);
 890 }
 891
 892 /*
 893  * thread_terminate_crashed_threads:
 894  * walk the list of crashed threads and put back set of threads
 895  * who are no longer being inspected.
 896  */
 897 void
 898 thread_terminate_crashed_threads()
 899 {
 900         thread_t th_remove;
 901         boolean_t should_wake_terminate_queue = FALSE;
 902
 903         simple_lock(&thread_terminate_lock);
 904         /*
 905          * loop through the crashed threads queue
 906          * to put any threads that are not being inspected anymore
 907          */
 908
 909         qe_foreach_element_safe(th_remove, &crashed_threads_queue, runq_links) {
 910                 /* make sure current_thread is never in crashed queue */
 911                 assert(th_remove != current_thread());
 912
 913                 if (th_remove->inspection == FALSE) {
 914                         re_queue_tail(&thread_terminate_queue, &th_remove->runq_links);
 915                         should_wake_terminate_queue = TRUE;
 916                 }
 917         }
 918
 919         simple_unlock(&thread_terminate_lock);
 920         if (should_wake_terminate_queue == TRUE) {
 921                 thread_wakeup((event_t)&thread_terminate_queue);
 922         }
 923 }
 924
 925 /*
 926  *      thread_stack_daemon:
 927  *
 928  *      Perform stack allocation as required due to
 929  *      invoke failures.
 930  */
 931 static void
 932 thread_stack_daemon(void)
 933 {
 934         thread_t                thread;
 935         spl_t                   s;
 936
 937         s = splsched();
 938         simple_lock(&thread_stack_lock);
 939
 940         while ((thread = qe_dequeue_head(&thread_stack_queue, struct thread, runq_links)) != THREAD_NULL) {
 941                 assert_thread_magic(thread);
 942
 943                 simple_unlock(&thread_stack_lock);
 944                 splx(s);
 945
 946                 /* allocate stack with interrupts enabled so that we can call into VM */
 947                 stack_alloc(thread);
 948
 949                 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_END, thread_tid(thread), 0, 0, 0, 0);
 950
 951                 s = splsched();
 952                 thread_lock(thread);
 953                 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
 954                 thread_unlock(thread);
 955
 956                 simple_lock(&thread_stack_lock);
 957         }
 958
 959         assert_wait((event_t)&thread_stack_queue, THREAD_UNINT);
 960         simple_unlock(&thread_stack_lock);
 961         splx(s);
 962
 963         thread_block((thread_continue_t)thread_stack_daemon);
 964         /*NOTREACHED*/
 965 }
 966
 967 /*
 968  *      thread_stack_enqueue:
 969  *
 970  *      Enqueue a thread for stack allocation.
 971  *
 972  *      Called at splsched.
 973  */
 974 void
 975 thread_stack_enqueue(
 976         thread_t                thread)
 977 {
 978         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_START, thread_tid(thread), 0, 0, 0, 0);
 979         assert_thread_magic(thread);
 980
 981         simple_lock(&thread_stack_lock);
 982         enqueue_tail(&thread_stack_queue, &thread->runq_links);
 983         simple_unlock(&thread_stack_lock);
 984
 985         thread_wakeup((event_t)&thread_stack_queue);
 986 }
 987
 988 void
 989 thread_daemon_init(void)
 990 {
 991         kern_return_t   result;
 992         thread_t        thread = NULL;
 993
 994         simple_lock_init(&thread_terminate_lock, 0);
 995         queue_init(&thread_terminate_queue);
 996         queue_init(&crashed_threads_queue);
 997
 998         result = kernel_thread_start_priority((thread_continue_t)thread_terminate_daemon, NULL, MINPRI_KERNEL, &thread);
 999         if (result != KERN_SUCCESS)
1000                 panic("thread_daemon_init: thread_terminate_daemon");
1001
1002         thread_deallocate(thread);
1003
1004         simple_lock_init(&thread_stack_lock, 0);
1005         queue_init(&thread_stack_queue);
1006
1007         result = kernel_thread_start_priority((thread_continue_t)thread_stack_daemon, NULL, BASEPRI_PREEMPT, &thread);
1008         if (result != KERN_SUCCESS)
1009                 panic("thread_daemon_init: thread_stack_daemon");
1010
1011         thread_deallocate(thread);
1012
1013         simple_lock_init(&thread_exception_lock, 0);
1014         queue_init(&thread_exception_queue);
1015
1016         result = kernel_thread_start_priority((thread_continue_t)thread_exception_daemon, NULL, MINPRI_KERNEL, &thread);
1017         if (result != KERN_SUCCESS)
1018                 panic("thread_daemon_init: thread_exception_daemon");
1019
1020         thread_deallocate(thread);
1021 }
1022
1023 #define TH_OPTION_NONE          0x00
1024 #define TH_OPTION_NOCRED        0x01
1025 #define TH_OPTION_NOSUSP        0x02
1026
1027 /*
1028  * Create a new thread.
1029  * Doesn't start the thread running.
1030  *
1031  * Task and tasks_threads_lock are returned locked on success.
1032  */
1033 static kern_return_t
1034 thread_create_internal(
1035         task_t                                  parent_task,
1036         integer_t                               priority,
1037         thread_continue_t               continuation,
1038         int                                             options,
1039         thread_t                                *out_thread)
1040 {
1041         thread_t                                new_thread;
1042         static thread_t                 first_thread;
1043
1044         /*
1045          *      Allocate a thread and initialize static fields
1046          */
1047         if (first_thread == THREAD_NULL)
1048                 new_thread = first_thread = current_thread();
1049         else
1050                 new_thread = (thread_t)zalloc(thread_zone);
1051         if (new_thread == THREAD_NULL)
1052                 return (KERN_RESOURCE_SHORTAGE);
1053
1054         if (new_thread != first_thread)
1055                 *new_thread = thread_template;
1056
1057 #ifdef MACH_BSD
1058         new_thread->uthread = uthread_alloc(parent_task, new_thread, (options & TH_OPTION_NOCRED) != 0);
1059         if (new_thread->uthread == NULL) {
1060 #if MACH_ASSERT
1061                 new_thread->thread_magic = 0;
1062 #endif /* MACH_ASSERT */
1063
1064                 zfree(thread_zone, new_thread);
1065                 return (KERN_RESOURCE_SHORTAGE);
1066         }
1067 #endif  /* MACH_BSD */
1068
1069         if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) {
1070 #ifdef MACH_BSD
1071                 void *ut = new_thread->uthread;
1072
1073                 new_thread->uthread = NULL;
1074                 /* cred free may not be necessary */
1075                 uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1076                 uthread_cred_free(ut);
1077                 uthread_zone_free(ut);
1078 #endif  /* MACH_BSD */
1079
1080 #if MACH_ASSERT
1081                 new_thread->thread_magic = 0;
1082 #endif /* MACH_ASSERT */
1083
1084                 zfree(thread_zone, new_thread);
1085                 return (KERN_FAILURE);
1086         }
1087
1088         new_thread->task = parent_task;
1089
1090         thread_lock_init(new_thread);
1091         wake_lock_init(new_thread);
1092
1093         lck_mtx_init(&new_thread->mutex, &thread_lck_grp, &thread_lck_attr);
1094
1095         ipc_thread_init(new_thread);
1096
1097         new_thread->continuation = continuation;
1098
1099         /* Allocate I/O Statistics structure */
1100         new_thread->thread_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1101         assert(new_thread->thread_io_stats != NULL);
1102         bzero(new_thread->thread_io_stats, sizeof(struct io_stat_info));
1103
1104 #if CONFIG_IOSCHED
1105         /* Clear out the I/O Scheduling info for AppleFSCompression */
1106         new_thread->decmp_upl = NULL;
1107 #endif /* CONFIG_IOSCHED */
1108
1109         lck_mtx_lock(&tasks_threads_lock);
1110         task_lock(parent_task);
1111
1112         /*
1113          * Fail thread creation if parent task is being torn down or has too many threads
1114          * If the caller asked for TH_OPTION_NOSUSP, also fail if the parent task is suspended
1115          */
1116         if (parent_task->active == 0 || parent_task->halting ||
1117             (parent_task->suspend_count > 0 && (options & TH_OPTION_NOSUSP) != 0) ||
1118             (parent_task->thread_count >= task_threadmax && parent_task != kernel_task)) {
1119                 task_unlock(parent_task);
1120                 lck_mtx_unlock(&tasks_threads_lock);
1121
1122 #ifdef MACH_BSD
1123                 {
1124                         void *ut = new_thread->uthread;
1125
1126                         new_thread->uthread = NULL;
1127                         uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1128                         /* cred free may not be necessary */
1129                         uthread_cred_free(ut);
1130                         uthread_zone_free(ut);
1131                 }
1132 #endif  /* MACH_BSD */
1133                 ipc_thread_disable(new_thread);
1134                 ipc_thread_terminate(new_thread);
1135                 kfree(new_thread->thread_io_stats, sizeof(struct io_stat_info));
1136                 lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp);
1137                 machine_thread_destroy(new_thread);
1138                 zfree(thread_zone, new_thread);
1139                 return (KERN_FAILURE);
1140         }
1141
1142         /* New threads inherit any default state on the task */
1143         machine_thread_inherit_taskwide(new_thread, parent_task);
1144
1145         task_reference_internal(parent_task);
1146
1147         if (new_thread->task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
1148                 /*
1149                  * This task has a per-thread CPU limit; make sure this new thread
1150                  * gets its limit set too, before it gets out of the kernel.
1151                  */
1152                 set_astledger(new_thread);
1153         }
1154
1155         /* Instantiate a thread ledger. Do not fail thread creation if ledger creation fails. */
1156         if ((new_thread->t_threadledger = ledger_instantiate(thread_ledger_template,
1157                                 LEDGER_CREATE_INACTIVE_ENTRIES)) != LEDGER_NULL) {
1158
1159                 ledger_entry_setactive(new_thread->t_threadledger, thread_ledgers.cpu_time);
1160         }
1161
1162 #ifdef CONFIG_BANK
1163         new_thread->t_bankledger = LEDGER_NULL;
1164         new_thread->t_deduct_bank_ledger_time = 0;
1165 #endif
1166
1167         new_thread->t_ledger = new_thread->task->ledger;
1168         if (new_thread->t_ledger)
1169                 ledger_reference(new_thread->t_ledger);
1170
1171 #if defined(CONFIG_SCHED_MULTIQ)
1172         /* Cache the task's sched_group */
1173         new_thread->sched_group = parent_task->sched_group;
1174 #endif /* defined(CONFIG_SCHED_MULTIQ) */
1175
1176         /* Cache the task's map */
1177         new_thread->map = parent_task->map;
1178
1179         timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread);
1180         timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread);
1181
1182 #if KPC
1183         kpc_thread_create(new_thread);
1184 #endif
1185
1186         /* Set the thread's scheduling parameters */
1187         new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task);
1188         new_thread->max_priority = parent_task->max_priority;
1189         new_thread->task_priority = parent_task->priority;
1190
1191         int new_priority = (priority < 0) ? parent_task->priority: priority;
1192         new_priority = (priority < 0)? parent_task->priority: priority;
1193         if (new_priority > new_thread->max_priority)
1194                 new_priority = new_thread->max_priority;
1195
1196         new_thread->importance = new_priority - new_thread->task_priority;
1197
1198         sched_set_thread_base_priority(new_thread, new_priority);
1199
1200 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1201         new_thread->sched_stamp = sched_tick;
1202         new_thread->pri_shift = sched_pri_shifts[new_thread->th_sched_bucket];
1203 #endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) */
1204
1205
1206         thread_policy_create(new_thread);
1207
1208         /* Chain the thread onto the task's list */
1209         queue_enter(&parent_task->threads, new_thread, thread_t, task_threads);
1210         parent_task->thread_count++;
1211
1212         /* So terminating threads don't need to take the task lock to decrement */
1213         hw_atomic_add(&parent_task->active_thread_count, 1);
1214
1215         /* Protected by the tasks_threads_lock */
1216         new_thread->thread_id = ++thread_unique_id;
1217
1218         queue_enter(&threads, new_thread, thread_t, threads);
1219         threads_count++;
1220
1221         new_thread->active = TRUE;
1222         if (task_is_a_corpse_fork(parent_task)) {
1223                 /* Set the inspection bit if the task is a corpse fork */
1224                 new_thread->inspection = TRUE;
1225         } else {
1226                 new_thread->inspection = FALSE;
1227         }
1228         new_thread->corpse_dup = FALSE;
1229         *out_thread = new_thread;
1230
1231         {
1232                 long    dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
1233
1234                 kdbg_trace_data(parent_task->bsd_info, &dbg_arg2);
1235
1236                 /*
1237                  * Starting with 26604425, exec'ing creates a new task/thread.
1238                  *
1239                  * NEWTHREAD in the current process has two possible meanings:
1240                  *
1241                  * 1) Create a new thread for this process.
1242                  * 2) Create a new thread for the future process this will become in an exec.
1243                  *
1244                  * To disambiguate these, arg3 will be set to TRUE for case #2.
1245                  *
1246                  * The value we need to find (TPF_EXEC_COPY) is stable in the case of a
1247                  * task exec'ing. The read of t_procflags does not take the proc_lock.
1248                  */
1249                 dbg_arg3 = (task_is_exec_copy(parent_task)) ? TRUE : 0;
1250
1251                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1252                         TRACE_DATA_NEWTHREAD | DBG_FUNC_NONE,
1253                         (vm_address_t)(uintptr_t)thread_tid(new_thread), dbg_arg2, dbg_arg3, 0, 0);
1254
1255                 kdbg_trace_string(parent_task->bsd_info,
1256                                                         &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
1257
1258                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1259                         TRACE_STRING_NEWTHREAD | DBG_FUNC_NONE,
1260                         dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
1261         }
1262
1263         DTRACE_PROC1(lwp__create, thread_t, *out_thread);
1264
1265         return (KERN_SUCCESS);
1266 }
1267
1268 static kern_return_t
1269 thread_create_internal2(
1270         task_t                          task,
1271         thread_t                        *new_thread,
1272         boolean_t                       from_user,
1273         thread_continue_t               continuation)
1274 {
1275         kern_return_t           result;
1276         thread_t                        thread;
1277
1278         if (task == TASK_NULL || task == kernel_task)
1279                 return (KERN_INVALID_ARGUMENT);
1280
1281         result = thread_create_internal(task, -1, continuation, TH_OPTION_NONE, &thread);
1282         if (result != KERN_SUCCESS)
1283                 return (result);
1284
1285         thread->user_stop_count = 1;
1286         thread_hold(thread);
1287         if (task->suspend_count > 0)
1288                 thread_hold(thread);
1289
1290         if (from_user)
1291                 extmod_statistics_incr_thread_create(task);
1292
1293         task_unlock(task);
1294         lck_mtx_unlock(&tasks_threads_lock);
1295
1296         *new_thread = thread;
1297
1298         return (KERN_SUCCESS);
1299 }
1300
1301 /* No prototype, since task_server.h has the _from_user version if KERNEL_SERVER */
1302 kern_return_t
1303 thread_create(
1304         task_t                          task,
1305         thread_t                        *new_thread);
1306
1307 kern_return_t
1308 thread_create(
1309         task_t                          task,
1310         thread_t                        *new_thread)
1311 {
1312         return thread_create_internal2(task, new_thread, FALSE, (thread_continue_t)thread_bootstrap_return);
1313 }
1314
1315 kern_return_t
1316 thread_create_from_user(
1317         task_t                          task,
1318         thread_t                        *new_thread)
1319 {
1320         return thread_create_internal2(task, new_thread, TRUE, (thread_continue_t)thread_bootstrap_return);
1321 }
1322
1323 kern_return_t
1324 thread_create_with_continuation(
1325         task_t                          task,
1326         thread_t                        *new_thread,
1327         thread_continue_t               continuation)
1328 {
1329         return thread_create_internal2(task, new_thread, FALSE, continuation);
1330 }
1331
1332 /*
1333  * Create a thread that is already started, but is waiting on an event
1334  */
1335 static kern_return_t
1336 thread_create_waiting_internal(
1337         task_t                  task,
1338         thread_continue_t       continuation,
1339         event_t                 event,
1340         int                     options,
1341         thread_t                *new_thread)
1342 {
1343         kern_return_t result;
1344         thread_t thread;
1345
1346         if (task == TASK_NULL || task == kernel_task)
1347                 return (KERN_INVALID_ARGUMENT);
1348
1349         result = thread_create_internal(task, -1, continuation, options, &thread);
1350         if (result != KERN_SUCCESS)
1351                 return (result);
1352
1353         /* note no user_stop_count or thread_hold here */
1354
1355         if (task->suspend_count > 0)
1356                 thread_hold(thread);
1357
1358         thread_mtx_lock(thread);
1359         thread_start_in_assert_wait(thread, event, THREAD_INTERRUPTIBLE);
1360         thread_mtx_unlock(thread);
1361
1362         task_unlock(task);
1363         lck_mtx_unlock(&tasks_threads_lock);
1364
1365         *new_thread = thread;
1366
1367         return (KERN_SUCCESS);
1368 }
1369
1370 kern_return_t
1371 thread_create_waiting(
1372         task_t                  task,
1373         thread_continue_t       continuation,
1374         event_t                 event,
1375         thread_t                *new_thread)
1376 {
1377         return thread_create_waiting_internal(task, continuation, event,
1378                                               TH_OPTION_NONE, new_thread);
1379 }
1380
1381
1382 static kern_return_t
1383 thread_create_running_internal2(
1384         task_t         task,
1385         int                     flavor,
1386         thread_state_t          new_state,
1387         mach_msg_type_number_t  new_state_count,
1388         thread_t                                *new_thread,
1389         boolean_t                               from_user)
1390 {
1391         kern_return_t  result;
1392         thread_t                                thread;
1393
1394         if (task == TASK_NULL || task == kernel_task)
1395                 return (KERN_INVALID_ARGUMENT);
1396
1397         result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, TH_OPTION_NONE, &thread);
1398         if (result != KERN_SUCCESS)
1399                 return (result);
1400
1401         if (task->suspend_count > 0)
1402                 thread_hold(thread);
1403
1404         result = machine_thread_set_state(thread, flavor, new_state, new_state_count);
1405         if (result != KERN_SUCCESS) {
1406                 task_unlock(task);
1407                 lck_mtx_unlock(&tasks_threads_lock);
1408
1409                 thread_terminate(thread);
1410                 thread_deallocate(thread);
1411                 return (result);
1412         }
1413
1414         thread_mtx_lock(thread);
1415         thread_start(thread);
1416         thread_mtx_unlock(thread);
1417
1418         if (from_user)
1419                 extmod_statistics_incr_thread_create(task);
1420
1421         task_unlock(task);
1422         lck_mtx_unlock(&tasks_threads_lock);
1423
1424         *new_thread = thread;
1425
1426         return (result);
1427 }
1428
1429 /* Prototype, see justification above */
1430 kern_return_t
1431 thread_create_running(
1432         task_t         task,
1433         int                     flavor,
1434         thread_state_t          new_state,
1435         mach_msg_type_number_t  new_state_count,
1436         thread_t                                *new_thread);
1437
1438 kern_return_t
1439 thread_create_running(
1440         task_t         task,
1441         int                     flavor,
1442         thread_state_t          new_state,
1443         mach_msg_type_number_t  new_state_count,
1444         thread_t                                *new_thread)
1445 {
1446         return thread_create_running_internal2(
1447                 task, flavor, new_state, new_state_count,
1448                 new_thread, FALSE);
1449 }
1450
1451 kern_return_t
1452 thread_create_running_from_user(
1453         task_t         task,
1454         int                     flavor,
1455         thread_state_t          new_state,
1456         mach_msg_type_number_t  new_state_count,
1457         thread_t                                *new_thread)
1458 {
1459         return thread_create_running_internal2(
1460                 task, flavor, new_state, new_state_count,
1461                 new_thread, TRUE);
1462 }
1463
1464 kern_return_t
1465 thread_create_workq(
1466         task_t                          task,
1467         thread_continue_t               thread_return,
1468         thread_t                        *new_thread)
1469 {
1470         kern_return_t           result;
1471         thread_t                        thread;
1472
1473         if (task == TASK_NULL || task == kernel_task)
1474                 return (KERN_INVALID_ARGUMENT);
1475
1476         result = thread_create_internal(task, -1, thread_return, TH_OPTION_NOCRED | TH_OPTION_NOSUSP, &thread);
1477         if (result != KERN_SUCCESS)
1478                 return (result);
1479
1480         thread->user_stop_count = 1;
1481         thread_hold(thread);
1482         if (task->suspend_count > 0)
1483                 thread_hold(thread);
1484
1485         task_unlock(task);
1486         lck_mtx_unlock(&tasks_threads_lock);
1487
1488         *new_thread = thread;
1489
1490         return (KERN_SUCCESS);
1491 }
1492
1493 kern_return_t
1494 thread_create_workq_waiting(
1495         task_t              task,
1496         thread_continue_t   continuation,
1497         event_t             event,
1498         thread_t            *new_thread)
1499 {
1500
1501         return thread_create_waiting_internal(task, continuation, event,
1502                                               TH_OPTION_NOCRED | TH_OPTION_NOSUSP,
1503                                               new_thread);
1504 }
1505
1506 /*
1507  *      kernel_thread_create:
1508  *
1509  *      Create a thread in the kernel task
1510  *      to execute in kernel context.
1511  */
1512 kern_return_t
1513 kernel_thread_create(
1514         thread_continue_t       continuation,
1515         void                            *parameter,
1516         integer_t                       priority,
1517         thread_t                        *new_thread)
1518 {
1519         kern_return_t           result;
1520         thread_t                        thread;
1521         task_t                          task = kernel_task;
1522
1523         result = thread_create_internal(task, priority, continuation, TH_OPTION_NONE, &thread);
1524         if (result != KERN_SUCCESS)
1525                 return (result);
1526
1527         task_unlock(task);
1528         lck_mtx_unlock(&tasks_threads_lock);
1529
1530         stack_alloc(thread);
1531         assert(thread->kernel_stack != 0);
1532         thread->reserved_stack = thread->kernel_stack;
1533
1534         thread->parameter = parameter;
1535
1536 if(debug_task & 1)
1537         kprintf("kernel_thread_create: thread = %p continuation = %p\n", thread, continuation);
1538         *new_thread = thread;
1539
1540         return (result);
1541 }
1542
1543 kern_return_t
1544 kernel_thread_start_priority(
1545         thread_continue_t       continuation,
1546         void                            *parameter,
1547         integer_t                       priority,
1548         thread_t                        *new_thread)
1549 {
1550         kern_return_t   result;
1551         thread_t                thread;
1552
1553         result = kernel_thread_create(continuation, parameter, priority, &thread);
1554         if (result != KERN_SUCCESS)
1555                 return (result);
1556
1557         *new_thread = thread;
1558
1559         thread_mtx_lock(thread);
1560         thread_start(thread);
1561         thread_mtx_unlock(thread);
1562
1563         return (result);
1564 }
1565
1566 kern_return_t
1567 kernel_thread_start(
1568         thread_continue_t       continuation,
1569         void                            *parameter,
1570         thread_t                        *new_thread)
1571 {
1572         return kernel_thread_start_priority(continuation, parameter, -1, new_thread);
1573 }
1574
1575 /* Separated into helper function so it can be used by THREAD_BASIC_INFO and THREAD_EXTENDED_INFO */
1576 /* it is assumed that the thread is locked by the caller */
1577 static void
1578 retrieve_thread_basic_info(thread_t thread, thread_basic_info_t basic_info)
1579 {
1580         int     state, flags;
1581
1582         /* fill in info */
1583
1584         thread_read_times(thread, &basic_info->user_time,
1585                                                                 &basic_info->system_time);
1586
1587         /*
1588          *      Update lazy-evaluated scheduler info because someone wants it.
1589          */
1590         if (SCHED(can_update_priority)(thread))
1591                 SCHED(update_priority)(thread);
1592
1593         basic_info->sleep_time = 0;
1594
1595         /*
1596          *      To calculate cpu_usage, first correct for timer rate,
1597          *      then for 5/8 ageing.  The correction factor [3/5] is
1598          *      (1/(5/8) - 1).
1599          */
1600         basic_info->cpu_usage = 0;
1601 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
1602         if (sched_tick_interval) {
1603                 basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage
1604                                                                         * TH_USAGE_SCALE) /     sched_tick_interval);
1605                 basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
1606         }
1607 #endif
1608
1609         if (basic_info->cpu_usage > TH_USAGE_SCALE)
1610                 basic_info->cpu_usage = TH_USAGE_SCALE;
1611
1612         basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
1613                                                                                         POLICY_TIMESHARE: POLICY_RR);
1614
1615         flags = 0;
1616         if (thread->options & TH_OPT_IDLE_THREAD)
1617                 flags |= TH_FLAGS_IDLE;
1618
1619         if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
1620                 flags |= TH_FLAGS_GLOBAL_FORCED_IDLE;
1621         }
1622
1623         if (!thread->kernel_stack)
1624                 flags |= TH_FLAGS_SWAPPED;
1625
1626         state = 0;
1627         if (thread->state & TH_TERMINATE)
1628                 state = TH_STATE_HALTED;
1629         else
1630         if (thread->state & TH_RUN)
1631                 state = TH_STATE_RUNNING;
1632         else
1633         if (thread->state & TH_UNINT)
1634                 state = TH_STATE_UNINTERRUPTIBLE;
1635         else
1636         if (thread->state & TH_SUSP)
1637                 state = TH_STATE_STOPPED;
1638         else
1639         if (thread->state & TH_WAIT)
1640                 state = TH_STATE_WAITING;
1641
1642         basic_info->run_state = state;
1643         basic_info->flags = flags;
1644
1645         basic_info->suspend_count = thread->user_stop_count;
1646
1647         return;
1648 }
1649
1650 kern_return_t
1651 thread_info_internal(
1652         thread_t                thread,
1653         thread_flavor_t                 flavor,
1654         thread_info_t                   thread_info_out,        /* ptr to OUT array */
1655         mach_msg_type_number_t  *thread_info_count)     /*IN/OUT*/
1656 {
1657         spl_t   s;
1658
1659         if (thread == THREAD_NULL)
1660                 return (KERN_INVALID_ARGUMENT);
1661
1662         if (flavor == THREAD_BASIC_INFO) {
1663
1664                 if (*thread_info_count < THREAD_BASIC_INFO_COUNT)
1665                         return (KERN_INVALID_ARGUMENT);
1666
1667                 s = splsched();
1668                 thread_lock(thread);
1669
1670                 retrieve_thread_basic_info(thread, (thread_basic_info_t) thread_info_out);
1671
1672                 thread_unlock(thread);
1673                 splx(s);
1674
1675                 *thread_info_count = THREAD_BASIC_INFO_COUNT;
1676
1677                 return (KERN_SUCCESS);
1678         }
1679         else
1680         if (flavor == THREAD_IDENTIFIER_INFO) {
1681                 thread_identifier_info_t        identifier_info;
1682
1683                 if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT)
1684                         return (KERN_INVALID_ARGUMENT);
1685
1686                 identifier_info = (thread_identifier_info_t) thread_info_out;
1687
1688                 s = splsched();
1689                 thread_lock(thread);
1690
1691                 identifier_info->thread_id = thread->thread_id;
1692                 identifier_info->thread_handle = thread->machine.cthread_self;
1693                 identifier_info->dispatch_qaddr = thread_dispatchqaddr(thread);
1694
1695                 thread_unlock(thread);
1696                 splx(s);
1697                 return KERN_SUCCESS;
1698         }
1699         else
1700         if (flavor == THREAD_SCHED_TIMESHARE_INFO) {
1701                 policy_timeshare_info_t         ts_info;
1702
1703                 if (*thread_info_count < POLICY_TIMESHARE_INFO_COUNT)
1704                         return (KERN_INVALID_ARGUMENT);
1705
1706                 ts_info = (policy_timeshare_info_t)thread_info_out;
1707
1708                 s = splsched();
1709                 thread_lock(thread);
1710
1711                 if (thread->sched_mode != TH_MODE_TIMESHARE) {
1712                         thread_unlock(thread);
1713                         splx(s);
1714                         return (KERN_INVALID_POLICY);
1715                 }
1716
1717                 ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
1718                 if (ts_info->depressed) {
1719                         ts_info->base_priority = DEPRESSPRI;
1720                         ts_info->depress_priority = thread->base_pri;
1721                 }
1722                 else {
1723                         ts_info->base_priority = thread->base_pri;
1724                         ts_info->depress_priority = -1;
1725                 }
1726
1727                 ts_info->cur_priority = thread->sched_pri;
1728                 ts_info->max_priority = thread->max_priority;
1729
1730                 thread_unlock(thread);
1731                 splx(s);
1732
1733                 *thread_info_count = POLICY_TIMESHARE_INFO_COUNT;
1734
1735                 return (KERN_SUCCESS);
1736         }
1737         else
1738         if (flavor == THREAD_SCHED_FIFO_INFO) {
1739                 if (*thread_info_count < POLICY_FIFO_INFO_COUNT)
1740                         return (KERN_INVALID_ARGUMENT);
1741
1742                 return (KERN_INVALID_POLICY);
1743         }
1744         else
1745         if (flavor == THREAD_SCHED_RR_INFO) {
1746                 policy_rr_info_t                        rr_info;
1747                 uint32_t quantum_time;
1748                 uint64_t quantum_ns;
1749
1750                 if (*thread_info_count < POLICY_RR_INFO_COUNT)
1751                         return (KERN_INVALID_ARGUMENT);
1752
1753                 rr_info = (policy_rr_info_t) thread_info_out;
1754
1755                 s = splsched();
1756                 thread_lock(thread);
1757
1758                 if (thread->sched_mode == TH_MODE_TIMESHARE) {
1759                         thread_unlock(thread);
1760                         splx(s);
1761
1762                         return (KERN_INVALID_POLICY);
1763             }
1764
1765                 rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
1766                 if (rr_info->depressed) {
1767                         rr_info->base_priority = DEPRESSPRI;
1768                         rr_info->depress_priority = thread->base_pri;
1769                 }
1770                 else {
1771                         rr_info->base_priority = thread->base_pri;
1772                         rr_info->depress_priority = -1;
1773                 }
1774
1775                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
1776                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
1777
1778                 rr_info->max_priority = thread->max_priority;
1779                 rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
1780
1781                 thread_unlock(thread);
1782                 splx(s);
1783
1784                 *thread_info_count = POLICY_RR_INFO_COUNT;
1785
1786                 return (KERN_SUCCESS);
1787         }
1788         else
1789         if (flavor == THREAD_EXTENDED_INFO) {
1790                 thread_basic_info_data_t        basic_info;
1791                 thread_extended_info_t          extended_info = (thread_extended_info_t) thread_info_out;
1792
1793                 if (*thread_info_count < THREAD_EXTENDED_INFO_COUNT) {
1794                         return (KERN_INVALID_ARGUMENT);
1795                 }
1796
1797                 s = splsched();
1798                 thread_lock(thread);
1799
1800                 /* NOTE: This mimics fill_taskthreadinfo(), which is the function used by proc_pidinfo() for
1801                  * the PROC_PIDTHREADINFO flavor (which can't be used on corpses)
1802                  */
1803                 retrieve_thread_basic_info(thread, &basic_info);
1804                 extended_info->pth_user_time = ((basic_info.user_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.user_time.microseconds * (integer_t)NSEC_PER_USEC));
1805                 extended_info->pth_system_time = ((basic_info.system_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.system_time.microseconds * (integer_t)NSEC_PER_USEC));
1806
1807                 extended_info->pth_cpu_usage = basic_info.cpu_usage;
1808                 extended_info->pth_policy = basic_info.policy;
1809                 extended_info->pth_run_state = basic_info.run_state;
1810                 extended_info->pth_flags = basic_info.flags;
1811                 extended_info->pth_sleep_time = basic_info.sleep_time;
1812                 extended_info->pth_curpri = thread->sched_pri;
1813                 extended_info->pth_priority = thread->base_pri;
1814                 extended_info->pth_maxpriority = thread->max_priority;
1815
1816                 bsd_getthreadname(thread->uthread,extended_info->pth_name);
1817
1818                 thread_unlock(thread);
1819                 splx(s);
1820
1821                 *thread_info_count = THREAD_EXTENDED_INFO_COUNT;
1822
1823                 return (KERN_SUCCESS);
1824         }
1825         else
1826         if (flavor == THREAD_DEBUG_INFO_INTERNAL) {
1827 #if DEVELOPMENT || DEBUG
1828                 thread_debug_info_internal_t dbg_info;
1829                 if (*thread_info_count < THREAD_DEBUG_INFO_INTERNAL_COUNT)
1830                         return (KERN_NOT_SUPPORTED);
1831
1832                 if (thread_info_out == NULL)
1833                         return (KERN_INVALID_ARGUMENT);
1834
1835                 dbg_info = (thread_debug_info_internal_t) thread_info_out;
1836                 dbg_info->page_creation_count = thread->t_page_creation_count;
1837
1838                 *thread_info_count = THREAD_DEBUG_INFO_INTERNAL_COUNT;
1839                 return (KERN_SUCCESS);
1840 #endif /* DEVELOPMENT || DEBUG */
1841                 return (KERN_NOT_SUPPORTED);
1842         }
1843
1844         return (KERN_INVALID_ARGUMENT);
1845 }
1846
1847 void
1848 thread_read_times(
1849         thread_t                thread,
1850         time_value_t    *user_time,
1851         time_value_t    *system_time)
1852 {
1853         clock_sec_t             secs;
1854         clock_usec_t    usecs;
1855         uint64_t                tval_user, tval_system;
1856
1857         tval_user = timer_grab(&thread->user_timer);
1858         tval_system = timer_grab(&thread->system_timer);
1859
1860         if (thread->precise_user_kernel_time) {
1861                 absolutetime_to_microtime(tval_user, &secs, &usecs);
1862                 user_time->seconds = (typeof(user_time->seconds))secs;
1863                 user_time->microseconds = usecs;
1864
1865                 absolutetime_to_microtime(tval_system, &secs, &usecs);
1866                 system_time->seconds = (typeof(system_time->seconds))secs;
1867                 system_time->microseconds = usecs;
1868         } else {
1869                 /* system_timer may represent either sys or user */
1870                 tval_user += tval_system;
1871                 absolutetime_to_microtime(tval_user, &secs, &usecs);
1872                 user_time->seconds = (typeof(user_time->seconds))secs;
1873                 user_time->microseconds = usecs;
1874
1875                 system_time->seconds = 0;
1876                 system_time->microseconds = 0;
1877         }
1878 }
1879
1880 uint64_t thread_get_runtime_self(void)
1881 {
1882         boolean_t interrupt_state;
1883         uint64_t runtime;
1884         thread_t thread = NULL;
1885         processor_t processor = NULL;
1886
1887         thread = current_thread();
1888
1889         /* Not interrupt safe, as the scheduler may otherwise update timer values underneath us */
1890         interrupt_state = ml_set_interrupts_enabled(FALSE);
1891         processor = current_processor();
1892         timer_switch(PROCESSOR_DATA(processor, thread_timer), mach_absolute_time(), PROCESSOR_DATA(processor, thread_timer));
1893         runtime = (timer_grab(&thread->user_timer) + timer_grab(&thread->system_timer));
1894         ml_set_interrupts_enabled(interrupt_state);
1895
1896         return runtime;
1897 }
1898
1899 kern_return_t
1900 thread_assign(
1901         __unused thread_t                       thread,
1902         __unused processor_set_t        new_pset)
1903 {
1904         return (KERN_FAILURE);
1905 }
1906
1907 /*
1908  *      thread_assign_default:
1909  *
1910  *      Special version of thread_assign for assigning threads to default
1911  *      processor set.
1912  */
1913 kern_return_t
1914 thread_assign_default(
1915         thread_t                thread)
1916 {
1917         return (thread_assign(thread, &pset0));
1918 }
1919
1920 /*
1921  *      thread_get_assignment
1922  *
1923  *      Return current assignment for this thread.
1924  */
1925 kern_return_t
1926 thread_get_assignment(
1927         thread_t                thread,
1928         processor_set_t *pset)
1929 {
1930         if (thread == NULL)
1931                 return (KERN_INVALID_ARGUMENT);
1932
1933         *pset = &pset0;
1934
1935         return (KERN_SUCCESS);
1936 }
1937
1938 /*
1939  *      thread_wire_internal:
1940  *
1941  *      Specify that the target thread must always be able
1942  *      to run and to allocate memory.
1943  */
1944 kern_return_t
1945 thread_wire_internal(
1946         host_priv_t             host_priv,
1947         thread_t                thread,
1948         boolean_t               wired,
1949         boolean_t               *prev_state)
1950 {
1951         if (host_priv == NULL || thread != current_thread())
1952                 return (KERN_INVALID_ARGUMENT);
1953
1954         assert(host_priv == &realhost);
1955
1956         if (prev_state)
1957             *prev_state = (thread->options & TH_OPT_VMPRIV) != 0;
1958
1959         if (wired) {
1960             if (!(thread->options & TH_OPT_VMPRIV))
1961                     vm_page_free_reserve(1);    /* XXX */
1962             thread->options |= TH_OPT_VMPRIV;
1963         }
1964         else {
1965             if (thread->options & TH_OPT_VMPRIV)
1966                     vm_page_free_reserve(-1);   /* XXX */
1967             thread->options &= ~TH_OPT_VMPRIV;
1968         }
1969
1970         return (KERN_SUCCESS);
1971 }
1972
1973
1974 /*
1975  *      thread_wire:
1976  *
1977  *      User-api wrapper for thread_wire_internal()
1978  */
1979 kern_return_t
1980 thread_wire(
1981         host_priv_t     host_priv,
1982         thread_t        thread,
1983         boolean_t       wired)
1984 {
1985     return (thread_wire_internal(host_priv, thread, wired, NULL));
1986 }
1987
1988
1989 boolean_t
1990 is_vm_privileged(void)
1991 {
1992         return current_thread()->options & TH_OPT_VMPRIV ? TRUE : FALSE;
1993 }
1994
1995 boolean_t
1996 set_vm_privilege(boolean_t privileged)
1997 {
1998         boolean_t       was_vmpriv;
1999
2000         if (current_thread()->options & TH_OPT_VMPRIV)
2001                 was_vmpriv = TRUE;
2002         else
2003                 was_vmpriv = FALSE;
2004
2005         if (privileged != FALSE)
2006                 current_thread()->options |= TH_OPT_VMPRIV;
2007         else
2008                 current_thread()->options &= ~TH_OPT_VMPRIV;
2009
2010         return (was_vmpriv);
2011 }
2012
2013 void
2014 set_thread_rwlock_boost(void)
2015 {
2016         current_thread()->rwlock_count++;
2017 }
2018
2019 void
2020 clear_thread_rwlock_boost(void)
2021 {
2022         thread_t thread = current_thread();
2023
2024         if ((thread->rwlock_count-- == 1) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2025
2026                 lck_rw_clear_promotion(thread);
2027         }
2028 }
2029
2030 /*
2031  * XXX assuming current thread only, for now...
2032  */
2033 void
2034 thread_guard_violation(thread_t thread, unsigned type)
2035 {
2036         assert(thread == current_thread());
2037
2038         spl_t s = splsched();
2039         /*
2040          * Use the saved state area of the thread structure
2041          * to store all info required to handle the AST when
2042          * returning to userspace
2043          */
2044         thread->guard_exc_info.type = type;
2045         thread_ast_set(thread, AST_GUARD);
2046         ast_propagate(thread->ast);
2047
2048         splx(s);
2049 }
2050
2051 /*
2052  *      guard_ast:
2053  *
2054  *      Handle AST_GUARD for a thread. This routine looks at the
2055  *      state saved in the thread structure to determine the cause
2056  *      of this exception. Based on this value, it invokes the
2057  *      appropriate routine which determines other exception related
2058  *      info and raises the exception.
2059  */
2060 void
2061 guard_ast(thread_t thread)
2062 {
2063         if (thread->guard_exc_info.type == GUARD_TYPE_MACH_PORT)
2064                 mach_port_guard_ast(thread);
2065         else
2066                 fd_guard_ast(thread);
2067 }
2068
2069 static void
2070 thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1)
2071 {
2072         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
2073 #if CONFIG_TELEMETRY
2074                 /*
2075                  * This thread is in danger of violating the CPU usage monitor. Enable telemetry
2076                  * on the entire task so there are micro-stackshots available if and when
2077                  * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots
2078                  * for this thread only; but now that this task is suspect, knowing what all of
2079                  * its threads are up to will be useful.
2080                  */
2081                 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1);
2082 #endif
2083                 return;
2084         }
2085
2086 #if CONFIG_TELEMETRY
2087         /*
2088          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
2089          * exceeded the limit, turn telemetry off for the task.
2090          */
2091         telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0);
2092 #endif
2093
2094         if (warning == 0) {
2095                 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU();
2096         }
2097 }
2098
2099 void __attribute__((noinline))
2100 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void)
2101 {
2102         int          pid                = 0;
2103         task_t           task                           = current_task();
2104         thread_t     thread             = current_thread();
2105         uint64_t     tid                = thread->thread_id;
2106         const char       *procname          = "unknown";
2107         time_value_t thread_total_time  = {0, 0};
2108         time_value_t thread_system_time;
2109         time_value_t thread_user_time;
2110         int          action;
2111         uint8_t      percentage;
2112         uint32_t     usage_percent = 0;
2113         uint32_t     interval_sec;
2114         uint64_t     interval_ns;
2115         uint64_t     balance_ns;
2116         boolean_t        fatal = FALSE;
2117         boolean_t        send_exc_resource = TRUE; /* in addition to RESOURCE_NOTIFY */
2118         kern_return_t   kr;
2119
2120 #ifdef EXC_RESOURCE_MONITORS
2121         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
2122 #endif /* EXC_RESOURCE_MONITORS */
2123         struct ledger_entry_info        lei;
2124
2125         assert(thread->t_threadledger != LEDGER_NULL);
2126
2127         /*
2128          * Extract the fatal bit and suspend the monitor (which clears the bit).
2129          */
2130         task_lock(task);
2131         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) {
2132                 fatal = TRUE;
2133                 send_exc_resource = TRUE;
2134         }
2135         /* Only one thread can be here at a time.  Whichever makes it through
2136            first will successfully suspend the monitor and proceed to send the
2137            notification.  Other threads will get an error trying to suspend the
2138            monitor and give up on sending the notification.  In the first release,
2139            the monitor won't be resumed for a number of seconds, but we may
2140            eventually need to handle low-latency resume.
2141          */
2142         kr = task_suspend_cpumon(task);
2143         task_unlock(task);
2144         if (kr == KERN_INVALID_ARGUMENT)        return;
2145
2146 #ifdef MACH_BSD
2147         pid = proc_selfpid();
2148         if (task->bsd_info != NULL) {
2149                 procname = proc_name_address(task->bsd_info);
2150         }
2151 #endif
2152
2153         thread_get_cpulimit(&action, &percentage, &interval_ns);
2154
2155         interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC);
2156
2157         thread_read_times(thread, &thread_user_time, &thread_system_time);
2158         time_value_add(&thread_total_time, &thread_user_time);
2159         time_value_add(&thread_total_time, &thread_system_time);
2160         ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei);
2161
2162         /* credit/debit/balance/limit are in absolute time units;
2163            the refill info is in nanoseconds. */
2164         absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns);
2165         if (lei.lei_last_refill > 0) {
2166                 usage_percent = (uint32_t)((balance_ns*100ULL) / lei.lei_last_refill);
2167         }
2168
2169         /* TODO: show task total runtime (via TASK_ABSOLUTETIME_INFO)? */
2170         printf("process %s[%d] thread %llu caught burning CPU! "
2171                "It used more than %d%% CPU over %u seconds "
2172                "(actual recent usage: %d%% over ~%llu seconds).  "
2173                "Thread lifetime cpu usage %d.%06ds, (%d.%06d user, %d.%06d sys) "
2174                "ledger balance: %lld mabs credit: %lld mabs debit: %lld mabs "
2175                "limit: %llu mabs period: %llu ns last refill: %llu ns%s.\n",
2176                procname, pid, tid,
2177                percentage, interval_sec,
2178                usage_percent,
2179                (lei.lei_last_refill + NSEC_PER_SEC/2) / NSEC_PER_SEC,
2180                thread_total_time.seconds, thread_total_time.microseconds,
2181                thread_user_time.seconds, thread_user_time.microseconds,
2182                thread_system_time.seconds,thread_system_time.microseconds,
2183                lei.lei_balance, lei.lei_credit, lei.lei_debit,
2184                lei.lei_limit, lei.lei_refill_period, lei.lei_last_refill,
2185                (fatal ? " [fatal violation]" : ""));
2186
2187         /*
2188            For now, send RESOURCE_NOTIFY in parallel with EXC_RESOURCE.  Once
2189            we have logging parity, we will stop sending EXC_RESOURCE (24508922).
2190          */
2191
2192         /* RESOURCE_NOTIFY MIG specifies nanoseconds of CPU time */
2193         lei.lei_balance = balance_ns;
2194         absolutetime_to_nanoseconds(lei.lei_limit, &lei.lei_limit);
2195         trace_resource_violation(RMON_CPUUSAGE_VIOLATED, &lei);
2196         kr = send_resource_violation(send_cpu_usage_violation, task, &lei,
2197                                                                  fatal ? kRNFatalLimitFlag : 0);
2198         if (kr) {
2199                 printf("send_resource_violation(CPU usage, ...): error %#x\n", kr);
2200         }
2201
2202 #ifdef EXC_RESOURCE_MONITORS
2203         if (send_exc_resource) {
2204                 if (disable_exc_resource) {
2205                         printf("process %s[%d] thread %llu caught burning CPU! "
2206                                    "EXC_RESOURCE%s supressed by a boot-arg\n",
2207                                    procname, pid, tid, fatal ? " (and termination)" : "");
2208                         return;
2209                 }
2210
2211                 if (audio_active) {
2212                         printf("process %s[%d] thread %llu caught burning CPU! "
2213                            "EXC_RESOURCE & termination supressed due to audio playback\n",
2214                                    procname, pid, tid);
2215                         return;
2216                 }
2217         }
2218
2219
2220         if (send_exc_resource) {
2221                 code[0] = code[1] = 0;
2222                 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU);
2223                 if (fatal) {
2224                         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR_FATAL);
2225                 }else {
2226                         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR);
2227                 }
2228                 EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec);
2229                 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], percentage);
2230                 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent);
2231                 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
2232         }
2233 #endif /* EXC_RESOURCE_MONITORS */
2234
2235         if (fatal) {
2236 #if CONFIG_JETSAM
2237                 jetsam_on_ledger_cpulimit_exceeded();
2238 #else
2239                 task_terminate_internal(task);
2240 #endif
2241         }
2242 }
2243
2244 void thread_update_io_stats(thread_t thread, int size, int io_flags)
2245 {
2246         int io_tier;
2247
2248         if (thread->thread_io_stats == NULL || thread->task->task_io_stats == NULL)
2249                 return;
2250
2251         if (io_flags & DKIO_READ) {
2252                 UPDATE_IO_STATS(thread->thread_io_stats->disk_reads, size);
2253                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->disk_reads, size);
2254         }
2255
2256         if (io_flags & DKIO_META) {
2257                 UPDATE_IO_STATS(thread->thread_io_stats->metadata, size);
2258                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->metadata, size);
2259         }
2260
2261         if (io_flags & DKIO_PAGING) {
2262                 UPDATE_IO_STATS(thread->thread_io_stats->paging, size);
2263                 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->paging, size);
2264         }
2265
2266         io_tier = ((io_flags & DKIO_TIER_MASK) >> DKIO_TIER_SHIFT);
2267         assert (io_tier < IO_NUM_PRIORITIES);
2268
2269         UPDATE_IO_STATS(thread->thread_io_stats->io_priority[io_tier], size);
2270         UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->io_priority[io_tier], size);
2271
2272         /* Update Total I/O Counts */
2273         UPDATE_IO_STATS(thread->thread_io_stats->total_io, size);
2274         UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->total_io, size);
2275
2276         if (!(io_flags & DKIO_READ)) {
2277                 DTRACE_IO3(physical_writes, struct task *, thread->task, uint32_t, size, int, io_flags);
2278                 ledger_credit(thread->task->ledger, task_ledgers.physical_writes, size);
2279         }
2280 }
2281
2282 static void
2283 init_thread_ledgers(void) {
2284         ledger_template_t t;
2285         int idx;
2286
2287         assert(thread_ledger_template == NULL);
2288
2289         if ((t = ledger_template_create("Per-thread ledger")) == NULL)
2290                 panic("couldn't create thread ledger template");
2291
2292         if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) {
2293                 panic("couldn't create cpu_time entry for thread ledger template");
2294         }
2295
2296         if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) {
2297                 panic("couldn't set thread ledger callback for cpu_time entry");
2298         }
2299
2300         thread_ledgers.cpu_time = idx;
2301
2302         thread_ledger_template = t;
2303 }
2304
2305 /*
2306  * Returns currently applied CPU usage limit, or 0/0 if none is applied.
2307  */
2308 int
2309 thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns)
2310 {
2311         int64_t         abstime = 0;
2312         uint64_t        limittime = 0;
2313         thread_t        thread = current_thread();
2314
2315         *percentage  = 0;
2316         *interval_ns = 0;
2317         *action      = 0;
2318
2319         if (thread->t_threadledger == LEDGER_NULL) {
2320                 /*
2321                  * This thread has no per-thread ledger, so it can't possibly
2322                  * have a CPU limit applied.
2323                  */
2324                 return (KERN_SUCCESS);
2325         }
2326
2327         ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns);
2328         ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime);
2329
2330         if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) {
2331                 /*
2332                  * This thread's CPU time ledger has no period or limit; so it
2333                  * doesn't have a CPU limit applied.
2334                  */
2335                  return (KERN_SUCCESS);
2336         }
2337
2338         /*
2339          * This calculation is the converse to the one in thread_set_cpulimit().
2340          */
2341         absolutetime_to_nanoseconds(abstime, &limittime);
2342         *percentage = (limittime * 100ULL) / *interval_ns;
2343         assert(*percentage <= 100);
2344
2345         if (thread->options & TH_OPT_PROC_CPULIMIT) {
2346                 assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0);
2347
2348                 *action = THREAD_CPULIMIT_BLOCK;
2349         } else if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2350                 assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0);
2351
2352                 *action = THREAD_CPULIMIT_EXCEPTION;
2353         } else {
2354                 *action = THREAD_CPULIMIT_DISABLE;
2355         }
2356
2357         return (KERN_SUCCESS);
2358 }
2359
2360 /*
2361  * Set CPU usage limit on a thread.
2362  *
2363  * Calling with percentage of 0 will unset the limit for this thread.
2364  */
2365 int
2366 thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns)
2367 {
2368         thread_t        thread = current_thread();
2369         ledger_t        l;
2370         uint64_t        limittime = 0;
2371         uint64_t        abstime = 0;
2372
2373         assert(percentage <= 100);
2374
2375         if (action == THREAD_CPULIMIT_DISABLE) {
2376                 /*
2377                  * Remove CPU limit, if any exists.
2378                  */
2379                 if (thread->t_threadledger != LEDGER_NULL) {
2380                         l = thread->t_threadledger;
2381                         ledger_set_limit(l, thread_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
2382                         ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_IGNORE);
2383                         thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT);
2384                 }
2385
2386                 return (0);
2387         }
2388
2389         if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) {
2390                 return (KERN_INVALID_ARGUMENT);
2391         }
2392
2393         l = thread->t_threadledger;
2394         if (l == LEDGER_NULL) {
2395                 /*
2396                  * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active.
2397                  */
2398                 if ((l = ledger_instantiate(thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL)
2399                         return (KERN_RESOURCE_SHORTAGE);
2400
2401                 /*
2402                  * We are the first to create this thread's ledger, so only activate our entry.
2403                  */
2404                 ledger_entry_setactive(l, thread_ledgers.cpu_time);
2405                 thread->t_threadledger = l;
2406         }
2407
2408         /*
2409          * The limit is specified as a percentage of CPU over an interval in nanoseconds.
2410          * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit.
2411          */
2412         limittime = (interval_ns * percentage) / 100;
2413         nanoseconds_to_absolutetime(limittime, &abstime);
2414         ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct);
2415         /*
2416          * Refill the thread's allotted CPU time every interval_ns nanoseconds.
2417          */
2418         ledger_set_period(l, thread_ledgers.cpu_time, interval_ns);
2419
2420         if (action == THREAD_CPULIMIT_EXCEPTION) {
2421                 /*
2422                  * We don't support programming the CPU usage monitor on a task if any of its
2423                  * threads have a per-thread blocking CPU limit configured.
2424                  */
2425                 if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2426                         panic("CPU usage monitor activated, but blocking thread limit exists");
2427                 }
2428
2429                 /*
2430                  * Make a note that this thread's CPU limit is being used for the task-wide CPU
2431                  * usage monitor. We don't have to arm the callback which will trigger the
2432                  * exception, because that was done for us in ledger_instantiate (because the
2433                  * ledger template used has a default callback).
2434                  */
2435                 thread->options |= TH_OPT_PROC_CPULIMIT;
2436         } else {
2437                 /*
2438                  * We deliberately override any CPU limit imposed by a task-wide limit (eg
2439                  * CPU usage monitor).
2440                  */
2441                 thread->options &= ~TH_OPT_PROC_CPULIMIT;
2442
2443                 thread->options |= TH_OPT_PRVT_CPULIMIT;
2444                 /* The per-thread ledger template by default has a callback for CPU time */
2445                 ledger_disable_callback(l, thread_ledgers.cpu_time);
2446                 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
2447         }
2448
2449         return (0);
2450 }
2451
2452 static void
2453 sched_call_null(
2454 __unused        int                     type,
2455 __unused        thread_t        thread)
2456 {
2457         return;
2458 }
2459
2460 void
2461 thread_sched_call(
2462         thread_t                thread,
2463         sched_call_t    call)
2464 {
2465         thread->sched_call = (call != NULL)? call: sched_call_null;
2466 }
2467
2468 sched_call_t
2469 thread_disable_sched_call(
2470         thread_t                thread,
2471         sched_call_t    call)
2472 {
2473         if (call) {
2474                 spl_t s = splsched();
2475                 thread_lock(thread);
2476                 if (thread->sched_call == call) {
2477                         thread->sched_call = sched_call_null;
2478                 } else {
2479                         call = NULL;
2480                 }
2481                 thread_unlock(thread);
2482                 splx(s);
2483         }
2484         return call;
2485 }
2486
2487 void
2488 thread_reenable_sched_call(
2489         thread_t                thread,
2490         sched_call_t    call)
2491 {
2492         if (call) {
2493                 spl_t s = splsched();
2494                 thread_lock(thread);
2495                 thread_sched_call(thread, call);
2496                 thread_unlock(thread);
2497                 splx(s);
2498         }
2499 }
2500
2501 void
2502 thread_static_param(
2503         thread_t                thread,
2504         boolean_t               state)
2505 {
2506         thread_mtx_lock(thread);
2507         thread->static_param = state;
2508         thread_mtx_unlock(thread);
2509 }
2510
2511 uint64_t
2512 thread_tid(
2513         thread_t        thread)
2514 {
2515         return (thread != THREAD_NULL? thread->thread_id: 0);
2516 }
2517
2518 uint16_t        thread_set_tag(thread_t th, uint16_t tag) {
2519         return thread_set_tag_internal(th, tag);
2520 }
2521 uint16_t        thread_get_tag(thread_t th) {
2522         return thread_get_tag_internal(th);
2523 }
2524
2525 uint64_t
2526 thread_dispatchqaddr(
2527         thread_t                thread)
2528 {
2529         uint64_t        dispatchqueue_addr;
2530         uint64_t        thread_handle;
2531
2532         if (thread == THREAD_NULL)
2533                 return 0;
2534
2535         thread_handle = thread->machine.cthread_self;
2536         if (thread_handle == 0)
2537                 return 0;
2538
2539         if (thread->inspection == TRUE)
2540                 dispatchqueue_addr = thread_handle + get_task_dispatchqueue_offset(thread->task);
2541         else if (thread->task->bsd_info)
2542                 dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
2543         else
2544                 dispatchqueue_addr = 0;
2545
2546         return dispatchqueue_addr;
2547 }
2548
2549 /*
2550  * Export routines to other components for things that are done as macros
2551  * within the osfmk component.
2552  */
2553
2554 #undef thread_reference
2555 void thread_reference(thread_t thread);
2556 void
2557 thread_reference(
2558         thread_t        thread)
2559 {
2560         if (thread != THREAD_NULL)
2561                 thread_reference_internal(thread);
2562 }
2563
2564 #undef thread_should_halt
2565
2566 boolean_t
2567 thread_should_halt(
2568         thread_t                th)
2569 {
2570         return (thread_should_halt_fast(th));
2571 }
2572
2573 /*
2574  * thread_set_voucher_name - reset the voucher port name bound to this thread
2575  *
2576  * Conditions:  nothing locked
2577  *
2578  *      If we already converted the previous name to a cached voucher
2579  *      reference, then we discard that reference here.  The next lookup
2580  *      will cache it again.
2581  */
2582
2583 kern_return_t
2584 thread_set_voucher_name(mach_port_name_t voucher_name)
2585 {
2586         thread_t thread = current_thread();
2587         ipc_voucher_t new_voucher = IPC_VOUCHER_NULL;
2588         ipc_voucher_t voucher;
2589 #ifdef CONFIG_BANK
2590         ledger_t bankledger = NULL;
2591 #endif
2592
2593         if (MACH_PORT_DEAD == voucher_name)
2594                 return KERN_INVALID_RIGHT;
2595
2596         /*
2597          * agressively convert to voucher reference
2598          */
2599         if (MACH_PORT_VALID(voucher_name)) {
2600                 new_voucher = convert_port_name_to_voucher(voucher_name);
2601                 if (IPC_VOUCHER_NULL == new_voucher)
2602                         return KERN_INVALID_ARGUMENT;
2603         }
2604 #ifdef CONFIG_BANK
2605         bankledger = bank_get_voucher_ledger(new_voucher);
2606 #endif
2607
2608         thread_mtx_lock(thread);
2609         voucher = thread->ith_voucher;
2610         thread->ith_voucher_name = voucher_name;
2611         thread->ith_voucher = new_voucher;
2612 #ifdef CONFIG_BANK
2613         bank_swap_thread_bank_ledger(thread, bankledger);
2614 #endif
2615         thread_mtx_unlock(thread);
2616
2617         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2618                                   MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2619                                   (uintptr_t)thread_tid(thread),
2620                                   (uintptr_t)voucher_name,
2621                                   VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
2622                                   1, 0);
2623
2624         if (IPC_VOUCHER_NULL != voucher)
2625                 ipc_voucher_release(voucher);
2626
2627         return KERN_SUCCESS;
2628 }
2629
2630 /*
2631  *  thread_get_mach_voucher - return a voucher reference for the specified thread voucher
2632  *
2633  *  Conditions:  nothing locked
2634  *
2635  *  A reference to the voucher may be lazily pending, if someone set the voucher name
2636  *  but nobody has done a lookup yet.  In that case, we'll have to do the equivalent
2637  *  lookup here.
2638  *
2639  *  NOTE:       At the moment, there is no distinction between the current and effective
2640  *              vouchers because we only set them at the thread level currently.
2641  */
2642 kern_return_t
2643 thread_get_mach_voucher(
2644         thread_act_t            thread,
2645         mach_voucher_selector_t __unused which,
2646         ipc_voucher_t           *voucherp)
2647 {
2648         ipc_voucher_t           voucher;
2649         mach_port_name_t        voucher_name;
2650
2651         if (THREAD_NULL == thread)
2652                 return KERN_INVALID_ARGUMENT;
2653
2654         thread_mtx_lock(thread);
2655         voucher = thread->ith_voucher;
2656
2657         /* if already cached, just return a ref */
2658         if (IPC_VOUCHER_NULL != voucher) {
2659                 ipc_voucher_reference(voucher);
2660                 thread_mtx_unlock(thread);
2661                 *voucherp = voucher;
2662                 return KERN_SUCCESS;
2663         }
2664
2665         voucher_name = thread->ith_voucher_name;
2666
2667         /* convert the name to a port, then voucher reference */
2668         if (MACH_PORT_VALID(voucher_name)) {
2669                 ipc_port_t port;
2670
2671                 if (KERN_SUCCESS !=
2672                     ipc_object_copyin(thread->task->itk_space, voucher_name,
2673                                       MACH_MSG_TYPE_COPY_SEND, (ipc_object_t *)&port)) {
2674                         thread->ith_voucher_name = MACH_PORT_NULL;
2675                         thread_mtx_unlock(thread);
2676                         *voucherp = IPC_VOUCHER_NULL;
2677                         return KERN_SUCCESS;
2678                 }
2679
2680                 /* convert to a voucher ref to return, and cache a ref on thread */
2681                 voucher = convert_port_to_voucher(port);
2682                 ipc_voucher_reference(voucher);
2683                 thread->ith_voucher = voucher;
2684                 thread_mtx_unlock(thread);
2685
2686                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2687                                           MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2688                                           (uintptr_t)thread_tid(thread),
2689                                           (uintptr_t)port,
2690                                           VM_KERNEL_ADDRPERM((uintptr_t)voucher),
2691                                           2, 0);
2692
2693
2694                 ipc_port_release_send(port);
2695         } else
2696                 thread_mtx_unlock(thread);
2697
2698         *voucherp = voucher;
2699         return KERN_SUCCESS;
2700 }
2701
2702 /*
2703  *  thread_set_mach_voucher - set a voucher reference for the specified thread voucher
2704  *
2705  *  Conditions: callers holds a reference on the voucher.
2706  *              nothing locked.
2707  *
2708  *  We grab another reference to the voucher and bind it to the thread.  Any lazy
2709  *  binding is erased.  The old voucher reference associated with the thread is
2710  *  discarded.
2711  */
2712 kern_return_t
2713 thread_set_mach_voucher(
2714         thread_t                thread,
2715         ipc_voucher_t           voucher)
2716 {
2717         ipc_voucher_t old_voucher;
2718 #ifdef CONFIG_BANK
2719         ledger_t bankledger = NULL;
2720 #endif
2721
2722         if (THREAD_NULL == thread)
2723                 return KERN_INVALID_ARGUMENT;
2724
2725         if (thread != current_thread() || thread->started)
2726                 return KERN_INVALID_ARGUMENT;
2727
2728
2729         ipc_voucher_reference(voucher);
2730 #ifdef CONFIG_BANK
2731         bankledger = bank_get_voucher_ledger(voucher);
2732 #endif
2733         thread_mtx_lock(thread);
2734         old_voucher = thread->ith_voucher;
2735         thread->ith_voucher = voucher;
2736         thread->ith_voucher_name = MACH_PORT_NULL;
2737 #ifdef CONFIG_BANK
2738         bank_swap_thread_bank_ledger(thread, bankledger);
2739 #endif
2740         thread_mtx_unlock(thread);
2741
2742         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2743                                   MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2744                                   (uintptr_t)thread_tid(thread),
2745                                   (uintptr_t)MACH_PORT_NULL,
2746                                   VM_KERNEL_ADDRPERM((uintptr_t)voucher),
2747                                   3, 0);
2748
2749         ipc_voucher_release(old_voucher);
2750
2751         return KERN_SUCCESS;
2752 }
2753
2754 /*
2755  *  thread_swap_mach_voucher - swap a voucher reference for the specified thread voucher
2756  *
2757  *  Conditions: callers holds a reference on the new and presumed old voucher(s).
2758  *              nothing locked.
2759  *
2760  *  If the old voucher is still the same as passed in, replace it with new voucher
2761  *  and discard the old (and the reference passed in).  Otherwise, discard the new
2762  *  and return an updated old voucher.
2763  */
2764 kern_return_t
2765 thread_swap_mach_voucher(
2766         thread_t                thread,
2767         ipc_voucher_t           new_voucher,
2768         ipc_voucher_t           *in_out_old_voucher)
2769 {
2770         mach_port_name_t old_voucher_name;
2771         ipc_voucher_t old_voucher;
2772 #ifdef CONFIG_BANK
2773         ledger_t bankledger = NULL;
2774 #endif
2775
2776         if (THREAD_NULL == thread)
2777                 return KERN_INVALID_TASK;
2778
2779         if (thread != current_thread() || thread->started)
2780                 return KERN_INVALID_ARGUMENT;
2781
2782 #ifdef CONFIG_BANK
2783         bankledger = bank_get_voucher_ledger(new_voucher);
2784 #endif
2785
2786         thread_mtx_lock(thread);
2787
2788         old_voucher = thread->ith_voucher;
2789
2790         if (IPC_VOUCHER_NULL == old_voucher) {
2791                 old_voucher_name = thread->ith_voucher_name;
2792
2793                 /* perform lazy binding if needed */
2794                 if (MACH_PORT_VALID(old_voucher_name)) {
2795                         old_voucher = convert_port_name_to_voucher(old_voucher_name);
2796                         thread->ith_voucher_name = MACH_PORT_NULL;
2797                         thread->ith_voucher = old_voucher;
2798
2799                         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2800                                                   MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2801                                                   (uintptr_t)thread_tid(thread),
2802                                                   (uintptr_t)old_voucher_name,
2803                                                   VM_KERNEL_ADDRPERM((uintptr_t)old_voucher),
2804                                                   4, 0);
2805
2806                 }
2807         }
2808
2809         /* swap in new voucher, if old voucher matches the one supplied */
2810         if (old_voucher == *in_out_old_voucher) {
2811                 ipc_voucher_reference(new_voucher);
2812                 thread->ith_voucher = new_voucher;
2813                 thread->ith_voucher_name = MACH_PORT_NULL;
2814 #ifdef CONFIG_BANK
2815                 bank_swap_thread_bank_ledger(thread, bankledger);
2816 #endif
2817                 thread_mtx_unlock(thread);
2818
2819                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2820                                           MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
2821                                           (uintptr_t)thread_tid(thread),
2822                                           (uintptr_t)MACH_PORT_NULL,
2823                                           VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
2824                                           5, 0);
2825
2826                 ipc_voucher_release(old_voucher);
2827
2828                 *in_out_old_voucher = IPC_VOUCHER_NULL;
2829                 return KERN_SUCCESS;
2830         }
2831
2832         /* Otherwise, just return old voucher reference */
2833         ipc_voucher_reference(old_voucher);
2834         thread_mtx_unlock(thread);
2835         *in_out_old_voucher = old_voucher;
2836         return KERN_SUCCESS;
2837 }
2838
2839 /*
2840  *  thread_get_current_voucher_origin_pid - get the pid of the originator of the current voucher.
2841  */
2842 kern_return_t
2843 thread_get_current_voucher_origin_pid(
2844         int32_t      *pid)
2845 {
2846         uint32_t buf_size;
2847         kern_return_t kr;
2848         thread_t thread = current_thread();
2849
2850         buf_size = sizeof(*pid);
2851         kr = mach_voucher_attr_command(thread->ith_voucher,
2852                 MACH_VOUCHER_ATTR_KEY_BANK,
2853                 BANK_ORIGINATOR_PID,
2854                 NULL,
2855                 0,
2856                 (mach_voucher_attr_content_t)pid,
2857                 &buf_size);
2858
2859         return kr;
2860 }
2861
2862 boolean_t
2863 thread_has_thread_name(thread_t th)
2864 {
2865         if ((th) && (th->uthread)) {
2866                 return bsd_hasthreadname(th->uthread);
2867         }
2868
2869         /*
2870          * This is an odd case; clients may set the thread name based on the lack of
2871          * a name, but in this context there is no uthread to attach the name to.
2872          */
2873         return FALSE;
2874 }
2875
2876 void
2877 thread_set_thread_name(thread_t th, const char* name)
2878 {
2879         if ((th) && (th->uthread) && name) {
2880                 bsd_setthreadname(th->uthread, name);
2881         }
2882 }
2883
2884 /*
2885  * thread_enable_send_importance - set/clear the SEND_IMPORTANCE thread option bit.
2886  */
2887 void thread_enable_send_importance(thread_t thread, boolean_t enable)
2888 {
2889         if (enable == TRUE)
2890                 thread->options |= TH_OPT_SEND_IMPORTANCE;
2891         else
2892                 thread->options &= ~TH_OPT_SEND_IMPORTANCE;
2893 }
2894
2895 #if CONFIG_DTRACE
2896 uint32_t dtrace_get_thread_predcache(thread_t thread)
2897 {
2898         if (thread != THREAD_NULL)
2899                 return thread->t_dtrace_predcache;
2900         else
2901                 return 0;
2902 }
2903
2904 int64_t dtrace_get_thread_vtime(thread_t thread)
2905 {
2906         if (thread != THREAD_NULL)
2907                 return thread->t_dtrace_vtime;
2908         else
2909                 return 0;
2910 }
2911
2912 int dtrace_get_thread_last_cpu_id(thread_t thread)
2913 {
2914         if ((thread != THREAD_NULL) && (thread->last_processor != PROCESSOR_NULL)) {
2915                 return thread->last_processor->cpu_id;
2916         } else {
2917                 return -1;
2918         }
2919 }
2920
2921 int64_t dtrace_get_thread_tracing(thread_t thread)
2922 {
2923         if (thread != THREAD_NULL)
2924                 return thread->t_dtrace_tracing;
2925         else
2926                 return 0;
2927 }
2928
2929 boolean_t dtrace_get_thread_reentering(thread_t thread)
2930 {
2931         if (thread != THREAD_NULL)
2932                 return (thread->options & TH_OPT_DTRACE) ? TRUE : FALSE;
2933         else
2934                 return 0;
2935 }
2936
2937 vm_offset_t dtrace_get_kernel_stack(thread_t thread)
2938 {
2939         if (thread != THREAD_NULL)
2940                 return thread->kernel_stack;
2941         else
2942                 return 0;
2943 }
2944
2945 int64_t dtrace_calc_thread_recent_vtime(thread_t thread)
2946 {
2947         if (thread != THREAD_NULL) {
2948                 processor_t             processor = current_processor();
2949                 uint64_t                                abstime = mach_absolute_time();
2950                 timer_t                                 timer;
2951
2952                 timer = PROCESSOR_DATA(processor, thread_timer);
2953
2954                 return timer_grab(&(thread->system_timer)) + timer_grab(&(thread->user_timer)) +
2955                                 (abstime - timer->tstamp); /* XXX need interrupts off to prevent missed time? */
2956         } else
2957                 return 0;
2958 }
2959
2960 void dtrace_set_thread_predcache(thread_t thread, uint32_t predcache)
2961 {
2962         if (thread != THREAD_NULL)
2963                 thread->t_dtrace_predcache = predcache;
2964 }
2965
2966 void dtrace_set_thread_vtime(thread_t thread, int64_t vtime)
2967 {
2968         if (thread != THREAD_NULL)
2969                 thread->t_dtrace_vtime = vtime;
2970 }
2971
2972 void dtrace_set_thread_tracing(thread_t thread, int64_t accum)
2973 {
2974         if (thread != THREAD_NULL)
2975                 thread->t_dtrace_tracing = accum;
2976 }
2977
2978 void dtrace_set_thread_reentering(thread_t thread, boolean_t vbool)
2979 {
2980         if (thread != THREAD_NULL) {
2981                 if (vbool)
2982                         thread->options |= TH_OPT_DTRACE;
2983                 else
2984                         thread->options &= (~TH_OPT_DTRACE);
2985         }
2986 }
2987
2988 vm_offset_t dtrace_set_thread_recover(thread_t thread, vm_offset_t recover)
2989 {
2990         vm_offset_t prev = 0;
2991
2992         if (thread != THREAD_NULL) {
2993                 prev = thread->recover;
2994                 thread->recover = recover;
2995         }
2996         return prev;
2997 }
2998
2999 void dtrace_thread_bootstrap(void)
3000 {
3001         task_t task = current_task();
3002
3003         if (task->thread_count == 1) {
3004                 thread_t thread = current_thread();
3005                 if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) {
3006                         thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS;
3007                         DTRACE_PROC(exec__success);
3008                         KDBG(BSDDBG_CODE(DBG_BSD_PROC,BSD_PROC_EXEC),
3009                              task_pid(task));
3010                 }
3011                 DTRACE_PROC(start);
3012         }
3013         DTRACE_PROC(lwp__start);
3014
3015 }
3016
3017 void
3018 dtrace_thread_didexec(thread_t thread)
3019 {
3020         thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS;
3021 }
3022 #endif /* CONFIG_DTRACE */