]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/thread.c
xnu-2422.115.4.tar.gz
[apple/xnu.git] / osfmk / kern / thread.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: kern/thread.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub
61 * Date: 1986
62 *
63 * Thread management primitives implementation.
64 */
65 /*
66 * Copyright (c) 1993 The University of Utah and
67 * the Computer Systems Laboratory (CSL). All rights reserved.
68 *
69 * Permission to use, copy, modify and distribute this software and its
70 * documentation is hereby granted, provided that both the copyright
71 * notice and this permission notice appear in all copies of the
72 * software, derivative works or modified versions, and any portions
73 * thereof, and that both notices appear in supporting documentation.
74 *
75 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
76 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
77 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
78 *
79 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
80 * improvements that they make and grant CSL redistribution rights.
81 *
82 */
83
84 #include <mach/mach_types.h>
85 #include <mach/boolean.h>
86 #include <mach/policy.h>
87 #include <mach/thread_info.h>
88 #include <mach/thread_special_ports.h>
89 #include <mach/thread_status.h>
90 #include <mach/time_value.h>
91 #include <mach/vm_param.h>
92
93 #include <machine/thread.h>
94 #include <machine/pal_routines.h>
95 #include <machine/limits.h>
96
97 #include <kern/kern_types.h>
98 #include <kern/kalloc.h>
99 #include <kern/cpu_data.h>
100 #include <kern/counters.h>
101 #include <kern/extmod_statistics.h>
102 #include <kern/ipc_mig.h>
103 #include <kern/ipc_tt.h>
104 #include <kern/mach_param.h>
105 #include <kern/machine.h>
106 #include <kern/misc_protos.h>
107 #include <kern/processor.h>
108 #include <kern/queue.h>
109 #include <kern/sched.h>
110 #include <kern/sched_prim.h>
111 #include <kern/sync_lock.h>
112 #include <kern/syscall_subr.h>
113 #include <kern/task.h>
114 #include <kern/thread.h>
115 #include <kern/host.h>
116 #include <kern/zalloc.h>
117 #include <kern/assert.h>
118 #include <kern/exc_resource.h>
119 #include <kern/telemetry.h>
120 #if KPC
121 #include <kern/kpc.h>
122 #endif
123
124 #include <ipc/ipc_kmsg.h>
125 #include <ipc/ipc_port.h>
126
127 #include <vm/vm_kern.h>
128 #include <vm/vm_pageout.h>
129
130 #include <sys/kdebug.h>
131
132 #include <mach/sdt.h>
133
134 /*
135 * Exported interfaces
136 */
137 #include <mach/task_server.h>
138 #include <mach/thread_act_server.h>
139 #include <mach/mach_host_server.h>
140 #include <mach/host_priv_server.h>
141
142 static struct zone *thread_zone;
143 static lck_grp_attr_t thread_lck_grp_attr;
144 lck_attr_t thread_lck_attr;
145 lck_grp_t thread_lck_grp;
146
147 decl_simple_lock_data(static,thread_stack_lock)
148 static queue_head_t thread_stack_queue;
149
150 decl_simple_lock_data(static,thread_terminate_lock)
151 static queue_head_t thread_terminate_queue;
152
153 static struct thread thread_template, init_thread;
154
155 static void sched_call_null(
156 int type,
157 thread_t thread);
158
159 #ifdef MACH_BSD
160 extern void proc_exit(void *);
161 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
162 extern int proc_selfpid(void);
163 extern char * proc_name_address(void *p);
164 #endif /* MACH_BSD */
165
166 extern int disable_exc_resource;
167 extern int audio_active;
168 extern int debug_task;
169 int thread_max = CONFIG_THREAD_MAX; /* Max number of threads */
170 int task_threadmax = CONFIG_THREAD_MAX;
171
172 static uint64_t thread_unique_id = 0;
173
174 struct _thread_ledger_indices thread_ledgers = { -1 };
175 static ledger_template_t thread_ledger_template = NULL;
176 void init_thread_ledgers(void);
177 int task_disable_cpumon(task_t task);
178
179 /*
180 * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry.
181 *
182 * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user
183 * stacktraces, aka micro-stackshots)
184 */
185 #define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70
186
187 int cpumon_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
188 void __attribute__((noinline)) THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(void);
189
190 /*
191 * The smallest interval over which we support limiting CPU consumption is 1ms
192 */
193 #define MINIMUM_CPULIMIT_INTERVAL_MS 1
194
195 void
196 thread_bootstrap(void)
197 {
198 /*
199 * Fill in a template thread for fast initialization.
200 */
201
202 thread_template.runq = PROCESSOR_NULL;
203
204 thread_template.ref_count = 2;
205
206 thread_template.reason = AST_NONE;
207 thread_template.at_safe_point = FALSE;
208 thread_template.wait_event = NO_EVENT64;
209 thread_template.wait_queue = WAIT_QUEUE_NULL;
210 thread_template.wait_result = THREAD_WAITING;
211 thread_template.options = THREAD_ABORTSAFE;
212 thread_template.state = TH_WAIT | TH_UNINT;
213 thread_template.wake_active = FALSE;
214 thread_template.continuation = THREAD_CONTINUE_NULL;
215 thread_template.parameter = NULL;
216
217 thread_template.importance = 0;
218 thread_template.sched_mode = TH_MODE_NONE;
219 thread_template.sched_flags = 0;
220 thread_template.saved_mode = TH_MODE_NONE;
221 thread_template.safe_release = 0;
222
223 thread_template.priority = 0;
224 thread_template.sched_pri = 0;
225 thread_template.max_priority = 0;
226 thread_template.task_priority = 0;
227 thread_template.promotions = 0;
228 thread_template.pending_promoter_index = 0;
229 thread_template.pending_promoter[0] =
230 thread_template.pending_promoter[1] = NULL;
231 thread_template.rwlock_count = 0;
232
233 thread_template.realtime.deadline = UINT64_MAX;
234
235 thread_template.current_quantum = 0;
236 thread_template.last_run_time = 0;
237 thread_template.last_quantum_refill_time = 0;
238
239 thread_template.computation_metered = 0;
240 thread_template.computation_epoch = 0;
241
242 #if defined(CONFIG_SCHED_TRADITIONAL)
243 thread_template.sched_stamp = 0;
244 thread_template.pri_shift = INT8_MAX;
245 thread_template.sched_usage = 0;
246 thread_template.cpu_usage = thread_template.cpu_delta = 0;
247 #endif
248 thread_template.c_switch = thread_template.p_switch = thread_template.ps_switch = 0;
249
250 thread_template.bound_processor = PROCESSOR_NULL;
251 thread_template.last_processor = PROCESSOR_NULL;
252
253 thread_template.sched_call = sched_call_null;
254
255 timer_init(&thread_template.user_timer);
256 timer_init(&thread_template.system_timer);
257 thread_template.user_timer_save = 0;
258 thread_template.system_timer_save = 0;
259 thread_template.vtimer_user_save = 0;
260 thread_template.vtimer_prof_save = 0;
261 thread_template.vtimer_rlim_save = 0;
262
263 thread_template.wait_timer_is_set = FALSE;
264 thread_template.wait_timer_active = 0;
265
266 thread_template.depress_timer_active = 0;
267
268 thread_template.special_handler.handler = special_handler;
269 thread_template.special_handler.next = NULL;
270
271 thread_template.funnel_lock = THR_FUNNEL_NULL;
272 thread_template.funnel_state = 0;
273 thread_template.recover = (vm_offset_t)NULL;
274
275 thread_template.map = VM_MAP_NULL;
276
277 #if CONFIG_DTRACE
278 thread_template.t_dtrace_predcache = 0;
279 thread_template.t_dtrace_vtime = 0;
280 thread_template.t_dtrace_tracing = 0;
281 #endif /* CONFIG_DTRACE */
282
283 #if KPC
284 thread_template.kpc_buf = NULL;
285 #endif
286
287 thread_template.t_chud = 0;
288 thread_template.t_page_creation_count = 0;
289 thread_template.t_page_creation_time = 0;
290
291 thread_template.affinity_set = NULL;
292
293 thread_template.syscalls_unix = 0;
294 thread_template.syscalls_mach = 0;
295
296 thread_template.t_ledger = LEDGER_NULL;
297 thread_template.t_threadledger = LEDGER_NULL;
298
299 thread_template.requested_policy = default_task_requested_policy;
300 thread_template.effective_policy = default_task_effective_policy;
301 thread_template.pended_policy = default_task_pended_policy;
302
303 thread_template.iotier_override = THROTTLE_LEVEL_NONE;
304
305 thread_template.thread_callout_interrupt_wakeups = thread_template.thread_callout_platform_idle_wakeups = 0;
306
307 thread_template.thread_timer_wakeups_bin_1 = thread_template.thread_timer_wakeups_bin_2 = 0;
308 thread_template.callout_woken_from_icontext = thread_template.callout_woken_from_platform_idle = 0;
309
310 thread_template.thread_tag = 0;
311
312 init_thread = thread_template;
313 machine_set_current_thread(&init_thread);
314 }
315
316 void
317 thread_init(void)
318 {
319 thread_zone = zinit(
320 sizeof(struct thread),
321 thread_max * sizeof(struct thread),
322 THREAD_CHUNK * sizeof(struct thread),
323 "threads");
324
325 lck_grp_attr_setdefault(&thread_lck_grp_attr);
326 lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr);
327 lck_attr_setdefault(&thread_lck_attr);
328
329 stack_init();
330
331 /*
332 * Initialize any machine-dependent
333 * per-thread structures necessary.
334 */
335 machine_thread_init();
336
337 if (!PE_parse_boot_argn("cpumon_ustackshots_trigger_pct", &cpumon_ustackshots_trigger_pct,
338 sizeof (cpumon_ustackshots_trigger_pct))) {
339 cpumon_ustackshots_trigger_pct = CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT;
340 }
341
342 init_thread_ledgers();
343 }
344
345 static void
346 thread_terminate_continue(void)
347 {
348 panic("thread_terminate_continue");
349 /*NOTREACHED*/
350 }
351
352 /*
353 * thread_terminate_self:
354 */
355 void
356 thread_terminate_self(void)
357 {
358 thread_t thread = current_thread();
359
360 task_t task;
361 spl_t s;
362 int threadcnt;
363
364 pal_thread_terminate_self(thread);
365
366 DTRACE_PROC(lwp__exit);
367
368 thread_mtx_lock(thread);
369
370 ipc_thread_disable(thread);
371
372 thread_mtx_unlock(thread);
373
374 s = splsched();
375 thread_lock(thread);
376
377 /*
378 * Cancel priority depression, wait for concurrent expirations
379 * on other processors.
380 */
381 if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
382 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
383
384 if (timer_call_cancel(&thread->depress_timer))
385 thread->depress_timer_active--;
386 }
387
388 while (thread->depress_timer_active > 0) {
389 thread_unlock(thread);
390 splx(s);
391
392 delay(1);
393
394 s = splsched();
395 thread_lock(thread);
396 }
397
398 thread_sched_call(thread, NULL);
399
400 thread_unlock(thread);
401 splx(s);
402
403 thread_policy_reset(thread);
404
405
406 task = thread->task;
407 uthread_cleanup(task, thread->uthread, task->bsd_info);
408 threadcnt = hw_atomic_sub(&task->active_thread_count, 1);
409
410 /*
411 * If we are the last thread to terminate and the task is
412 * associated with a BSD process, perform BSD process exit.
413 */
414 if (threadcnt == 0 && task->bsd_info != NULL)
415 proc_exit(task->bsd_info);
416
417 uthread_cred_free(thread->uthread);
418
419 s = splsched();
420 thread_lock(thread);
421
422 /*
423 * Cancel wait timer, and wait for
424 * concurrent expirations.
425 */
426 if (thread->wait_timer_is_set) {
427 thread->wait_timer_is_set = FALSE;
428
429 if (timer_call_cancel(&thread->wait_timer))
430 thread->wait_timer_active--;
431 }
432
433 while (thread->wait_timer_active > 0) {
434 thread_unlock(thread);
435 splx(s);
436
437 delay(1);
438
439 s = splsched();
440 thread_lock(thread);
441 }
442
443 /*
444 * If there is a reserved stack, release it.
445 */
446 if (thread->reserved_stack != 0) {
447 stack_free_reserved(thread);
448 thread->reserved_stack = 0;
449 }
450
451 /*
452 * Mark thread as terminating, and block.
453 */
454 thread->state |= TH_TERMINATE;
455 thread_mark_wait_locked(thread, THREAD_UNINT);
456 assert(thread->promotions == 0);
457 assert(thread->rwlock_count == 0);
458 thread_unlock(thread);
459 /* splsched */
460
461 thread_block((thread_continue_t)thread_terminate_continue);
462 /*NOTREACHED*/
463 }
464
465 void
466 thread_deallocate(
467 thread_t thread)
468 {
469 task_t task;
470
471 if (thread == THREAD_NULL)
472 return;
473
474 if (thread_deallocate_internal(thread) > 0)
475 return;
476
477 if(!(thread->state & TH_TERMINATE2))
478 panic("thread_deallocate: thread not properly terminated\n");
479
480 #if KPC
481 kpc_thread_destroy(thread);
482 #endif
483
484
485 ipc_thread_terminate(thread);
486
487 task = thread->task;
488
489 #ifdef MACH_BSD
490 {
491 void *ut = thread->uthread;
492
493 thread->uthread = NULL;
494 uthread_zone_free(ut);
495 }
496 #endif /* MACH_BSD */
497
498 if (thread->t_ledger)
499 ledger_dereference(thread->t_ledger);
500 if (thread->t_threadledger)
501 ledger_dereference(thread->t_threadledger);
502
503 if (thread->kernel_stack != 0)
504 stack_free(thread);
505
506 lck_mtx_destroy(&thread->mutex, &thread_lck_grp);
507 machine_thread_destroy(thread);
508
509 task_deallocate(task);
510
511 zfree(thread_zone, thread);
512 }
513
514 /*
515 * thread_terminate_daemon:
516 *
517 * Perform final clean up for terminating threads.
518 */
519 static void
520 thread_terminate_daemon(void)
521 {
522 thread_t self, thread;
523 task_t task;
524
525 self = current_thread();
526 self->options |= TH_OPT_SYSTEM_CRITICAL;
527
528 (void)splsched();
529 simple_lock(&thread_terminate_lock);
530
531 while ((thread = (thread_t)dequeue_head(&thread_terminate_queue)) != THREAD_NULL) {
532 simple_unlock(&thread_terminate_lock);
533 (void)spllo();
534
535 task = thread->task;
536
537 task_lock(task);
538 task->total_user_time += timer_grab(&thread->user_timer);
539 if (thread->precise_user_kernel_time) {
540 task->total_system_time += timer_grab(&thread->system_timer);
541 } else {
542 task->total_user_time += timer_grab(&thread->system_timer);
543 }
544
545 task->c_switch += thread->c_switch;
546 task->p_switch += thread->p_switch;
547 task->ps_switch += thread->ps_switch;
548
549 task->syscalls_unix += thread->syscalls_unix;
550 task->syscalls_mach += thread->syscalls_mach;
551
552 task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
553 task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
554 queue_remove(&task->threads, thread, thread_t, task_threads);
555 task->thread_count--;
556
557 /*
558 * If the task is being halted, and there is only one thread
559 * left in the task after this one, then wakeup that thread.
560 */
561 if (task->thread_count == 1 && task->halting)
562 thread_wakeup((event_t)&task->halting);
563
564 task_unlock(task);
565
566 lck_mtx_lock(&tasks_threads_lock);
567 queue_remove(&threads, thread, thread_t, threads);
568 threads_count--;
569 lck_mtx_unlock(&tasks_threads_lock);
570
571 thread_deallocate(thread);
572
573 (void)splsched();
574 simple_lock(&thread_terminate_lock);
575 }
576
577 assert_wait((event_t)&thread_terminate_queue, THREAD_UNINT);
578 simple_unlock(&thread_terminate_lock);
579 /* splsched */
580
581 self->options &= ~TH_OPT_SYSTEM_CRITICAL;
582 thread_block((thread_continue_t)thread_terminate_daemon);
583 /*NOTREACHED*/
584 }
585
586 /*
587 * thread_terminate_enqueue:
588 *
589 * Enqueue a terminating thread for final disposition.
590 *
591 * Called at splsched.
592 */
593 void
594 thread_terminate_enqueue(
595 thread_t thread)
596 {
597 simple_lock(&thread_terminate_lock);
598 enqueue_tail(&thread_terminate_queue, (queue_entry_t)thread);
599 simple_unlock(&thread_terminate_lock);
600
601 thread_wakeup((event_t)&thread_terminate_queue);
602 }
603
604 /*
605 * thread_stack_daemon:
606 *
607 * Perform stack allocation as required due to
608 * invoke failures.
609 */
610 static void
611 thread_stack_daemon(void)
612 {
613 thread_t thread;
614 spl_t s;
615
616 s = splsched();
617 simple_lock(&thread_stack_lock);
618
619 while ((thread = (thread_t)dequeue_head(&thread_stack_queue)) != THREAD_NULL) {
620 simple_unlock(&thread_stack_lock);
621 splx(s);
622
623 /* allocate stack with interrupts enabled so that we can call into VM */
624 stack_alloc(thread);
625
626 s = splsched();
627 thread_lock(thread);
628 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
629 thread_unlock(thread);
630
631 simple_lock(&thread_stack_lock);
632 }
633
634 assert_wait((event_t)&thread_stack_queue, THREAD_UNINT);
635 simple_unlock(&thread_stack_lock);
636 splx(s);
637
638 thread_block((thread_continue_t)thread_stack_daemon);
639 /*NOTREACHED*/
640 }
641
642 /*
643 * thread_stack_enqueue:
644 *
645 * Enqueue a thread for stack allocation.
646 *
647 * Called at splsched.
648 */
649 void
650 thread_stack_enqueue(
651 thread_t thread)
652 {
653 simple_lock(&thread_stack_lock);
654 enqueue_tail(&thread_stack_queue, (queue_entry_t)thread);
655 simple_unlock(&thread_stack_lock);
656
657 thread_wakeup((event_t)&thread_stack_queue);
658 }
659
660 void
661 thread_daemon_init(void)
662 {
663 kern_return_t result;
664 thread_t thread = NULL;
665
666 simple_lock_init(&thread_terminate_lock, 0);
667 queue_init(&thread_terminate_queue);
668
669 result = kernel_thread_start_priority((thread_continue_t)thread_terminate_daemon, NULL, MINPRI_KERNEL, &thread);
670 if (result != KERN_SUCCESS)
671 panic("thread_daemon_init: thread_terminate_daemon");
672
673 thread_deallocate(thread);
674
675 simple_lock_init(&thread_stack_lock, 0);
676 queue_init(&thread_stack_queue);
677
678 result = kernel_thread_start_priority((thread_continue_t)thread_stack_daemon, NULL, BASEPRI_PREEMPT, &thread);
679 if (result != KERN_SUCCESS)
680 panic("thread_daemon_init: thread_stack_daemon");
681
682 thread_deallocate(thread);
683 }
684
685 /*
686 * Create a new thread.
687 * Doesn't start the thread running.
688 */
689 static kern_return_t
690 thread_create_internal(
691 task_t parent_task,
692 integer_t priority,
693 thread_continue_t continuation,
694 int options,
695 #define TH_OPTION_NONE 0x00
696 #define TH_OPTION_NOCRED 0x01
697 #define TH_OPTION_NOSUSP 0x02
698 thread_t *out_thread)
699 {
700 thread_t new_thread;
701 static thread_t first_thread;
702
703 /*
704 * Allocate a thread and initialize static fields
705 */
706 if (first_thread == THREAD_NULL)
707 new_thread = first_thread = current_thread();
708 else
709 new_thread = (thread_t)zalloc(thread_zone);
710 if (new_thread == THREAD_NULL)
711 return (KERN_RESOURCE_SHORTAGE);
712
713 if (new_thread != first_thread)
714 *new_thread = thread_template;
715
716 #ifdef MACH_BSD
717 new_thread->uthread = uthread_alloc(parent_task, new_thread, (options & TH_OPTION_NOCRED) != 0);
718 if (new_thread->uthread == NULL) {
719 zfree(thread_zone, new_thread);
720 return (KERN_RESOURCE_SHORTAGE);
721 }
722 #endif /* MACH_BSD */
723
724 if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) {
725 #ifdef MACH_BSD
726 void *ut = new_thread->uthread;
727
728 new_thread->uthread = NULL;
729 /* cred free may not be necessary */
730 uthread_cleanup(parent_task, ut, parent_task->bsd_info);
731 uthread_cred_free(ut);
732 uthread_zone_free(ut);
733 #endif /* MACH_BSD */
734
735 zfree(thread_zone, new_thread);
736 return (KERN_FAILURE);
737 }
738
739 new_thread->task = parent_task;
740
741 thread_lock_init(new_thread);
742 wake_lock_init(new_thread);
743
744 lck_mtx_init(&new_thread->mutex, &thread_lck_grp, &thread_lck_attr);
745
746 ipc_thread_init(new_thread);
747
748 new_thread->continuation = continuation;
749
750 lck_mtx_lock(&tasks_threads_lock);
751 task_lock(parent_task);
752
753 if ( !parent_task->active || parent_task->halting ||
754 ((options & TH_OPTION_NOSUSP) != 0 &&
755 parent_task->suspend_count > 0) ||
756 (parent_task->thread_count >= task_threadmax &&
757 parent_task != kernel_task) ) {
758 task_unlock(parent_task);
759 lck_mtx_unlock(&tasks_threads_lock);
760
761 #ifdef MACH_BSD
762 {
763 void *ut = new_thread->uthread;
764
765 new_thread->uthread = NULL;
766 uthread_cleanup(parent_task, ut, parent_task->bsd_info);
767 /* cred free may not be necessary */
768 uthread_cred_free(ut);
769 uthread_zone_free(ut);
770 }
771 #endif /* MACH_BSD */
772 ipc_thread_disable(new_thread);
773 ipc_thread_terminate(new_thread);
774 lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp);
775 machine_thread_destroy(new_thread);
776 zfree(thread_zone, new_thread);
777 return (KERN_FAILURE);
778 }
779
780 /* New threads inherit any default state on the task */
781 machine_thread_inherit_taskwide(new_thread, parent_task);
782
783 task_reference_internal(parent_task);
784
785 if (new_thread->task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
786 /*
787 * This task has a per-thread CPU limit; make sure this new thread
788 * gets its limit set too, before it gets out of the kernel.
789 */
790 set_astledger(new_thread);
791 }
792 new_thread->t_threadledger = LEDGER_NULL; /* per thread ledger is not inherited */
793 new_thread->t_ledger = new_thread->task->ledger;
794 if (new_thread->t_ledger)
795 ledger_reference(new_thread->t_ledger);
796
797 /* Cache the task's map */
798 new_thread->map = parent_task->map;
799
800 /* Chain the thread onto the task's list */
801 queue_enter(&parent_task->threads, new_thread, thread_t, task_threads);
802 parent_task->thread_count++;
803
804 /* So terminating threads don't need to take the task lock to decrement */
805 hw_atomic_add(&parent_task->active_thread_count, 1);
806
807 /* Protected by the tasks_threads_lock */
808 new_thread->thread_id = ++thread_unique_id;
809
810 queue_enter(&threads, new_thread, thread_t, threads);
811 threads_count++;
812
813 timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread);
814 timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread);
815
816 #if CONFIG_COUNTERS
817 /*
818 * If parent task has any reservations, they need to be propagated to this
819 * thread.
820 */
821 new_thread->t_chud = (TASK_PMC_FLAG == (parent_task->t_chud & TASK_PMC_FLAG)) ?
822 THREAD_PMC_FLAG : 0U;
823 #endif
824 #if KPC
825 kpc_thread_create(new_thread);
826 #endif
827
828 /* Only need to update policies pushed from task to thread */
829 new_thread->requested_policy.bg_iotier = parent_task->effective_policy.bg_iotier;
830 new_thread->requested_policy.terminated = parent_task->effective_policy.terminated;
831
832 /* Set the thread's scheduling parameters */
833 new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task);
834 new_thread->sched_flags = 0;
835 new_thread->max_priority = parent_task->max_priority;
836 new_thread->task_priority = parent_task->priority;
837 new_thread->priority = (priority < 0)? parent_task->priority: priority;
838 if (new_thread->priority > new_thread->max_priority)
839 new_thread->priority = new_thread->max_priority;
840 new_thread->importance = new_thread->priority - new_thread->task_priority;
841 new_thread->saved_importance = new_thread->importance;
842
843 #if defined(CONFIG_SCHED_TRADITIONAL)
844 new_thread->sched_stamp = sched_tick;
845 new_thread->pri_shift = sched_pri_shift;
846 #endif
847 SCHED(compute_priority)(new_thread, FALSE);
848
849 new_thread->active = TRUE;
850
851 *out_thread = new_thread;
852
853 {
854 long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
855
856 kdbg_trace_data(parent_task->bsd_info, &dbg_arg2);
857
858 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
859 TRACEDBG_CODE(DBG_TRACE_DATA, 1) | DBG_FUNC_NONE,
860 (vm_address_t)(uintptr_t)thread_tid(new_thread), dbg_arg2, 0, 0, 0);
861
862 kdbg_trace_string(parent_task->bsd_info,
863 &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
864
865 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
866 TRACEDBG_CODE(DBG_TRACE_STRING, 1) | DBG_FUNC_NONE,
867 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
868 }
869
870 DTRACE_PROC1(lwp__create, thread_t, *out_thread);
871
872 return (KERN_SUCCESS);
873 }
874
875 static kern_return_t
876 thread_create_internal2(
877 task_t task,
878 thread_t *new_thread,
879 boolean_t from_user)
880 {
881 kern_return_t result;
882 thread_t thread;
883
884 if (task == TASK_NULL || task == kernel_task)
885 return (KERN_INVALID_ARGUMENT);
886
887 result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, TH_OPTION_NONE, &thread);
888 if (result != KERN_SUCCESS)
889 return (result);
890
891 thread->user_stop_count = 1;
892 thread_hold(thread);
893 if (task->suspend_count > 0)
894 thread_hold(thread);
895
896 if (from_user)
897 extmod_statistics_incr_thread_create(task);
898
899 task_unlock(task);
900 lck_mtx_unlock(&tasks_threads_lock);
901
902 *new_thread = thread;
903
904 return (KERN_SUCCESS);
905 }
906
907 /* No prototype, since task_server.h has the _from_user version if KERNEL_SERVER */
908 kern_return_t
909 thread_create(
910 task_t task,
911 thread_t *new_thread);
912
913 kern_return_t
914 thread_create(
915 task_t task,
916 thread_t *new_thread)
917 {
918 return thread_create_internal2(task, new_thread, FALSE);
919 }
920
921 kern_return_t
922 thread_create_from_user(
923 task_t task,
924 thread_t *new_thread)
925 {
926 return thread_create_internal2(task, new_thread, TRUE);
927 }
928
929 static kern_return_t
930 thread_create_running_internal2(
931 register task_t task,
932 int flavor,
933 thread_state_t new_state,
934 mach_msg_type_number_t new_state_count,
935 thread_t *new_thread,
936 boolean_t from_user)
937 {
938 register kern_return_t result;
939 thread_t thread;
940
941 if (task == TASK_NULL || task == kernel_task)
942 return (KERN_INVALID_ARGUMENT);
943
944 result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, TH_OPTION_NONE, &thread);
945 if (result != KERN_SUCCESS)
946 return (result);
947
948 result = machine_thread_set_state(
949 thread, flavor, new_state, new_state_count);
950 if (result != KERN_SUCCESS) {
951 task_unlock(task);
952 lck_mtx_unlock(&tasks_threads_lock);
953
954 thread_terminate(thread);
955 thread_deallocate(thread);
956 return (result);
957 }
958
959 thread_mtx_lock(thread);
960 thread_start_internal(thread);
961 thread_mtx_unlock(thread);
962
963 if (from_user)
964 extmod_statistics_incr_thread_create(task);
965
966 task_unlock(task);
967 lck_mtx_unlock(&tasks_threads_lock);
968
969 *new_thread = thread;
970
971 return (result);
972 }
973
974 /* Prototype, see justification above */
975 kern_return_t
976 thread_create_running(
977 register task_t task,
978 int flavor,
979 thread_state_t new_state,
980 mach_msg_type_number_t new_state_count,
981 thread_t *new_thread);
982
983 kern_return_t
984 thread_create_running(
985 register task_t task,
986 int flavor,
987 thread_state_t new_state,
988 mach_msg_type_number_t new_state_count,
989 thread_t *new_thread)
990 {
991 return thread_create_running_internal2(
992 task, flavor, new_state, new_state_count,
993 new_thread, FALSE);
994 }
995
996 kern_return_t
997 thread_create_running_from_user(
998 register task_t task,
999 int flavor,
1000 thread_state_t new_state,
1001 mach_msg_type_number_t new_state_count,
1002 thread_t *new_thread)
1003 {
1004 return thread_create_running_internal2(
1005 task, flavor, new_state, new_state_count,
1006 new_thread, TRUE);
1007 }
1008
1009 kern_return_t
1010 thread_create_workq(
1011 task_t task,
1012 thread_continue_t thread_return,
1013 thread_t *new_thread)
1014 {
1015 kern_return_t result;
1016 thread_t thread;
1017
1018 if (task == TASK_NULL || task == kernel_task)
1019 return (KERN_INVALID_ARGUMENT);
1020
1021 result = thread_create_internal(task, -1, thread_return, TH_OPTION_NOCRED | TH_OPTION_NOSUSP, &thread);
1022 if (result != KERN_SUCCESS)
1023 return (result);
1024
1025 thread->user_stop_count = 1;
1026 thread_hold(thread);
1027 if (task->suspend_count > 0)
1028 thread_hold(thread);
1029
1030 task_unlock(task);
1031 lck_mtx_unlock(&tasks_threads_lock);
1032
1033 *new_thread = thread;
1034
1035 return (KERN_SUCCESS);
1036 }
1037
1038 /*
1039 * kernel_thread_create:
1040 *
1041 * Create a thread in the kernel task
1042 * to execute in kernel context.
1043 */
1044 kern_return_t
1045 kernel_thread_create(
1046 thread_continue_t continuation,
1047 void *parameter,
1048 integer_t priority,
1049 thread_t *new_thread)
1050 {
1051 kern_return_t result;
1052 thread_t thread;
1053 task_t task = kernel_task;
1054
1055 result = thread_create_internal(task, priority, continuation, TH_OPTION_NONE, &thread);
1056 if (result != KERN_SUCCESS)
1057 return (result);
1058
1059 task_unlock(task);
1060 lck_mtx_unlock(&tasks_threads_lock);
1061
1062 stack_alloc(thread);
1063 assert(thread->kernel_stack != 0);
1064 thread->reserved_stack = thread->kernel_stack;
1065
1066 thread->parameter = parameter;
1067
1068 if(debug_task & 1)
1069 kprintf("kernel_thread_create: thread = %p continuation = %p\n", thread, continuation);
1070 *new_thread = thread;
1071
1072 return (result);
1073 }
1074
1075 kern_return_t
1076 kernel_thread_start_priority(
1077 thread_continue_t continuation,
1078 void *parameter,
1079 integer_t priority,
1080 thread_t *new_thread)
1081 {
1082 kern_return_t result;
1083 thread_t thread;
1084
1085 result = kernel_thread_create(continuation, parameter, priority, &thread);
1086 if (result != KERN_SUCCESS)
1087 return (result);
1088
1089 *new_thread = thread;
1090
1091 thread_mtx_lock(thread);
1092 thread_start_internal(thread);
1093 thread_mtx_unlock(thread);
1094
1095 return (result);
1096 }
1097
1098 kern_return_t
1099 kernel_thread_start(
1100 thread_continue_t continuation,
1101 void *parameter,
1102 thread_t *new_thread)
1103 {
1104 return kernel_thread_start_priority(continuation, parameter, -1, new_thread);
1105 }
1106
1107
1108 kern_return_t
1109 thread_info_internal(
1110 register thread_t thread,
1111 thread_flavor_t flavor,
1112 thread_info_t thread_info_out, /* ptr to OUT array */
1113 mach_msg_type_number_t *thread_info_count) /*IN/OUT*/
1114 {
1115 int state, flags;
1116 spl_t s;
1117
1118 if (thread == THREAD_NULL)
1119 return (KERN_INVALID_ARGUMENT);
1120
1121 if (flavor == THREAD_BASIC_INFO) {
1122 register thread_basic_info_t basic_info;
1123
1124 if (*thread_info_count < THREAD_BASIC_INFO_COUNT)
1125 return (KERN_INVALID_ARGUMENT);
1126
1127 basic_info = (thread_basic_info_t) thread_info_out;
1128
1129 s = splsched();
1130 thread_lock(thread);
1131
1132 /* fill in info */
1133
1134 thread_read_times(thread, &basic_info->user_time,
1135 &basic_info->system_time);
1136
1137 /*
1138 * Update lazy-evaluated scheduler info because someone wants it.
1139 */
1140 if (SCHED(can_update_priority)(thread))
1141 SCHED(update_priority)(thread);
1142
1143 basic_info->sleep_time = 0;
1144
1145 /*
1146 * To calculate cpu_usage, first correct for timer rate,
1147 * then for 5/8 ageing. The correction factor [3/5] is
1148 * (1/(5/8) - 1).
1149 */
1150 basic_info->cpu_usage = 0;
1151 #if defined(CONFIG_SCHED_TRADITIONAL)
1152 if (sched_tick_interval) {
1153 basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage
1154 * TH_USAGE_SCALE) / sched_tick_interval);
1155 basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
1156 }
1157 #endif
1158
1159 if (basic_info->cpu_usage > TH_USAGE_SCALE)
1160 basic_info->cpu_usage = TH_USAGE_SCALE;
1161
1162 basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
1163 POLICY_TIMESHARE: POLICY_RR);
1164
1165 flags = 0;
1166 if (thread->options & TH_OPT_IDLE_THREAD)
1167 flags |= TH_FLAGS_IDLE;
1168
1169 if (!thread->kernel_stack)
1170 flags |= TH_FLAGS_SWAPPED;
1171
1172 state = 0;
1173 if (thread->state & TH_TERMINATE)
1174 state = TH_STATE_HALTED;
1175 else
1176 if (thread->state & TH_RUN)
1177 state = TH_STATE_RUNNING;
1178 else
1179 if (thread->state & TH_UNINT)
1180 state = TH_STATE_UNINTERRUPTIBLE;
1181 else
1182 if (thread->state & TH_SUSP)
1183 state = TH_STATE_STOPPED;
1184 else
1185 if (thread->state & TH_WAIT)
1186 state = TH_STATE_WAITING;
1187
1188 basic_info->run_state = state;
1189 basic_info->flags = flags;
1190
1191 basic_info->suspend_count = thread->user_stop_count;
1192
1193 thread_unlock(thread);
1194 splx(s);
1195
1196 *thread_info_count = THREAD_BASIC_INFO_COUNT;
1197
1198 return (KERN_SUCCESS);
1199 }
1200 else
1201 if (flavor == THREAD_IDENTIFIER_INFO) {
1202 register thread_identifier_info_t identifier_info;
1203
1204 if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT)
1205 return (KERN_INVALID_ARGUMENT);
1206
1207 identifier_info = (thread_identifier_info_t) thread_info_out;
1208
1209 s = splsched();
1210 thread_lock(thread);
1211
1212 identifier_info->thread_id = thread->thread_id;
1213 identifier_info->thread_handle = thread->machine.cthread_self;
1214 if(thread->task->bsd_info) {
1215 identifier_info->dispatch_qaddr = identifier_info->thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
1216 } else {
1217 thread_unlock(thread);
1218 splx(s);
1219 return KERN_INVALID_ARGUMENT;
1220 }
1221
1222 thread_unlock(thread);
1223 splx(s);
1224 return KERN_SUCCESS;
1225 }
1226 else
1227 if (flavor == THREAD_SCHED_TIMESHARE_INFO) {
1228 policy_timeshare_info_t ts_info;
1229
1230 if (*thread_info_count < POLICY_TIMESHARE_INFO_COUNT)
1231 return (KERN_INVALID_ARGUMENT);
1232
1233 ts_info = (policy_timeshare_info_t)thread_info_out;
1234
1235 s = splsched();
1236 thread_lock(thread);
1237
1238 if (thread->sched_mode != TH_MODE_TIMESHARE) {
1239 thread_unlock(thread);
1240 splx(s);
1241
1242 return (KERN_INVALID_POLICY);
1243 }
1244
1245 ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
1246 if (ts_info->depressed) {
1247 ts_info->base_priority = DEPRESSPRI;
1248 ts_info->depress_priority = thread->priority;
1249 }
1250 else {
1251 ts_info->base_priority = thread->priority;
1252 ts_info->depress_priority = -1;
1253 }
1254
1255 ts_info->cur_priority = thread->sched_pri;
1256 ts_info->max_priority = thread->max_priority;
1257
1258 thread_unlock(thread);
1259 splx(s);
1260
1261 *thread_info_count = POLICY_TIMESHARE_INFO_COUNT;
1262
1263 return (KERN_SUCCESS);
1264 }
1265 else
1266 if (flavor == THREAD_SCHED_FIFO_INFO) {
1267 if (*thread_info_count < POLICY_FIFO_INFO_COUNT)
1268 return (KERN_INVALID_ARGUMENT);
1269
1270 return (KERN_INVALID_POLICY);
1271 }
1272 else
1273 if (flavor == THREAD_SCHED_RR_INFO) {
1274 policy_rr_info_t rr_info;
1275 uint32_t quantum_time;
1276 uint64_t quantum_ns;
1277
1278 if (*thread_info_count < POLICY_RR_INFO_COUNT)
1279 return (KERN_INVALID_ARGUMENT);
1280
1281 rr_info = (policy_rr_info_t) thread_info_out;
1282
1283 s = splsched();
1284 thread_lock(thread);
1285
1286 if (thread->sched_mode == TH_MODE_TIMESHARE) {
1287 thread_unlock(thread);
1288 splx(s);
1289
1290 return (KERN_INVALID_POLICY);
1291 }
1292
1293 rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
1294 if (rr_info->depressed) {
1295 rr_info->base_priority = DEPRESSPRI;
1296 rr_info->depress_priority = thread->priority;
1297 }
1298 else {
1299 rr_info->base_priority = thread->priority;
1300 rr_info->depress_priority = -1;
1301 }
1302
1303 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
1304 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
1305
1306 rr_info->max_priority = thread->max_priority;
1307 rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
1308
1309 thread_unlock(thread);
1310 splx(s);
1311
1312 *thread_info_count = POLICY_RR_INFO_COUNT;
1313
1314 return (KERN_SUCCESS);
1315 }
1316
1317 return (KERN_INVALID_ARGUMENT);
1318 }
1319
1320 void
1321 thread_read_times(
1322 thread_t thread,
1323 time_value_t *user_time,
1324 time_value_t *system_time)
1325 {
1326 clock_sec_t secs;
1327 clock_usec_t usecs;
1328 uint64_t tval_user, tval_system;
1329
1330 tval_user = timer_grab(&thread->user_timer);
1331 tval_system = timer_grab(&thread->system_timer);
1332
1333 if (thread->precise_user_kernel_time) {
1334 absolutetime_to_microtime(tval_user, &secs, &usecs);
1335 user_time->seconds = (typeof(user_time->seconds))secs;
1336 user_time->microseconds = usecs;
1337
1338 absolutetime_to_microtime(tval_system, &secs, &usecs);
1339 system_time->seconds = (typeof(system_time->seconds))secs;
1340 system_time->microseconds = usecs;
1341 } else {
1342 /* system_timer may represent either sys or user */
1343 tval_user += tval_system;
1344 absolutetime_to_microtime(tval_user, &secs, &usecs);
1345 user_time->seconds = (typeof(user_time->seconds))secs;
1346 user_time->microseconds = usecs;
1347
1348 system_time->seconds = 0;
1349 system_time->microseconds = 0;
1350 }
1351 }
1352
1353 kern_return_t
1354 thread_assign(
1355 __unused thread_t thread,
1356 __unused processor_set_t new_pset)
1357 {
1358 return (KERN_FAILURE);
1359 }
1360
1361 /*
1362 * thread_assign_default:
1363 *
1364 * Special version of thread_assign for assigning threads to default
1365 * processor set.
1366 */
1367 kern_return_t
1368 thread_assign_default(
1369 thread_t thread)
1370 {
1371 return (thread_assign(thread, &pset0));
1372 }
1373
1374 /*
1375 * thread_get_assignment
1376 *
1377 * Return current assignment for this thread.
1378 */
1379 kern_return_t
1380 thread_get_assignment(
1381 thread_t thread,
1382 processor_set_t *pset)
1383 {
1384 if (thread == NULL)
1385 return (KERN_INVALID_ARGUMENT);
1386
1387 *pset = &pset0;
1388
1389 return (KERN_SUCCESS);
1390 }
1391
1392 /*
1393 * thread_wire_internal:
1394 *
1395 * Specify that the target thread must always be able
1396 * to run and to allocate memory.
1397 */
1398 kern_return_t
1399 thread_wire_internal(
1400 host_priv_t host_priv,
1401 thread_t thread,
1402 boolean_t wired,
1403 boolean_t *prev_state)
1404 {
1405 if (host_priv == NULL || thread != current_thread())
1406 return (KERN_INVALID_ARGUMENT);
1407
1408 assert(host_priv == &realhost);
1409
1410 if (prev_state)
1411 *prev_state = (thread->options & TH_OPT_VMPRIV) != 0;
1412
1413 if (wired) {
1414 if (!(thread->options & TH_OPT_VMPRIV))
1415 vm_page_free_reserve(1); /* XXX */
1416 thread->options |= TH_OPT_VMPRIV;
1417 }
1418 else {
1419 if (thread->options & TH_OPT_VMPRIV)
1420 vm_page_free_reserve(-1); /* XXX */
1421 thread->options &= ~TH_OPT_VMPRIV;
1422 }
1423
1424 return (KERN_SUCCESS);
1425 }
1426
1427
1428 /*
1429 * thread_wire:
1430 *
1431 * User-api wrapper for thread_wire_internal()
1432 */
1433 kern_return_t
1434 thread_wire(
1435 host_priv_t host_priv,
1436 thread_t thread,
1437 boolean_t wired)
1438 {
1439 return (thread_wire_internal(host_priv, thread, wired, NULL));
1440 }
1441
1442
1443 /*
1444 * XXX assuming current thread only, for now...
1445 */
1446 void
1447 thread_guard_violation(thread_t thread, unsigned type)
1448 {
1449 assert(thread == current_thread());
1450
1451 spl_t s = splsched();
1452 /*
1453 * Use the saved state area of the thread structure
1454 * to store all info required to handle the AST when
1455 * returning to userspace
1456 */
1457 thread->guard_exc_info.type = type;
1458 thread_ast_set(thread, AST_GUARD);
1459 ast_propagate(thread->ast);
1460
1461 splx(s);
1462 }
1463
1464 /*
1465 * guard_ast:
1466 *
1467 * Handle AST_GUARD for a thread. This routine looks at the
1468 * state saved in the thread structure to determine the cause
1469 * of this exception. Based on this value, it invokes the
1470 * appropriate routine which determines other exception related
1471 * info and raises the exception.
1472 */
1473 void
1474 guard_ast(thread_t thread)
1475 {
1476 if (thread->guard_exc_info.type == GUARD_TYPE_MACH_PORT)
1477 mach_port_guard_ast(thread);
1478 else
1479 fd_guard_ast(thread);
1480 }
1481
1482 static void
1483 thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1)
1484 {
1485 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
1486 #if CONFIG_TELEMETRY
1487 /*
1488 * This thread is in danger of violating the CPU usage monitor. Enable telemetry
1489 * on the entire task so there are micro-stackshots available if and when
1490 * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots
1491 * for this thread only; but now that this task is suspect, knowing what all of
1492 * its threads are up to will be useful.
1493 */
1494 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1);
1495 #endif
1496 return;
1497 }
1498
1499 #if CONFIG_TELEMETRY
1500 /*
1501 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
1502 * exceeded the limit, turn telemetry off for the task.
1503 */
1504 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0);
1505 #endif
1506
1507 if (warning == 0) {
1508 THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE();
1509 }
1510 }
1511
1512 void __attribute__((noinline))
1513 THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(void)
1514 {
1515 int pid = 0;
1516 task_t task = current_task();
1517 thread_t thread = current_thread();
1518 uint64_t tid = thread->thread_id;
1519 char *procname = (char *) "unknown";
1520 time_value_t thread_total_time = {0, 0};
1521 time_value_t thread_system_time;
1522 time_value_t thread_user_time;
1523 int action;
1524 uint8_t percentage;
1525 uint32_t limit_percent;
1526 uint32_t usage_percent;
1527 uint32_t interval_sec;
1528 uint64_t interval_ns;
1529 uint64_t balance_ns;
1530 boolean_t fatal = FALSE;
1531
1532 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1533 struct ledger_entry_info lei;
1534
1535 assert(thread->t_threadledger != LEDGER_NULL);
1536
1537 /*
1538 * Now that a thread has tripped the monitor, disable it for the entire task.
1539 */
1540 task_lock(task);
1541 if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) {
1542 /*
1543 * The CPU usage monitor has been disabled on our task, so some other
1544 * thread must have gotten here first. We only send one exception per
1545 * task lifetime, so there's nothing left for us to do here.
1546 */
1547 task_unlock(task);
1548 return;
1549 }
1550 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) {
1551 fatal = TRUE;
1552 }
1553 task_disable_cpumon(task);
1554 task_unlock(task);
1555
1556 #ifdef MACH_BSD
1557 pid = proc_selfpid();
1558 if (task->bsd_info != NULL)
1559 procname = proc_name_address(task->bsd_info);
1560 #endif
1561
1562 thread_get_cpulimit(&action, &percentage, &interval_ns);
1563
1564 interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC);
1565
1566 thread_read_times(thread, &thread_user_time, &thread_system_time);
1567 time_value_add(&thread_total_time, &thread_user_time);
1568 time_value_add(&thread_total_time, &thread_system_time);
1569
1570 ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei);
1571
1572 absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns);
1573 usage_percent = (uint32_t) ((balance_ns * 100ULL) / lei.lei_last_refill);
1574
1575 /* Show refill period in the same units as balance, limit, etc */
1576 nanoseconds_to_absolutetime(lei.lei_refill_period, &lei.lei_refill_period);
1577
1578 limit_percent = (uint32_t) ((lei.lei_limit * 100ULL) / lei.lei_refill_period);
1579
1580 /* TODO: show task total runtime as well? see TASK_ABSOLUTETIME_INFO */
1581
1582 if (disable_exc_resource) {
1583 printf("process %s[%d] thread %llu caught burning CPU!; EXC_RESOURCE "
1584 "supressed by a boot-arg\n", procname, pid, tid);
1585 return;
1586 }
1587
1588 if (audio_active) {
1589 printf("process %s[%d] thread %llu caught burning CPU!; EXC_RESOURCE "
1590 "supressed due to audio playback\n", procname, pid, tid);
1591 return;
1592 }
1593 printf("process %s[%d] thread %llu caught burning CPU! "
1594 "It used more than %d%% CPU (Actual recent usage: %d%%) over %d seconds. "
1595 "thread lifetime cpu usage %d.%06d seconds, (%d.%06d user, %d.%06d system) "
1596 "ledger info: balance: %lld credit: %lld debit: %lld limit: %llu (%d%%) "
1597 "period: %llu time since last refill (ns): %llu \n",
1598 procname, pid, tid,
1599 percentage, usage_percent, interval_sec,
1600 thread_total_time.seconds, thread_total_time.microseconds,
1601 thread_user_time.seconds, thread_user_time.microseconds,
1602 thread_system_time.seconds, thread_system_time.microseconds,
1603 lei.lei_balance,
1604 lei.lei_credit, lei.lei_debit,
1605 lei.lei_limit, limit_percent,
1606 lei.lei_refill_period, lei.lei_last_refill);
1607
1608
1609 code[0] = code[1] = 0;
1610 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU);
1611 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR);
1612 EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec);
1613 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], limit_percent);
1614 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent);
1615 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
1616
1617 if (fatal) {
1618 task_terminate_internal(task);
1619 }
1620 }
1621
1622 void
1623 init_thread_ledgers(void) {
1624 ledger_template_t t;
1625 int idx;
1626
1627 assert(thread_ledger_template == NULL);
1628
1629 if ((t = ledger_template_create("Per-thread ledger")) == NULL)
1630 panic("couldn't create thread ledger template");
1631
1632 if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) {
1633 panic("couldn't create cpu_time entry for thread ledger template");
1634 }
1635
1636 if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) {
1637 panic("couldn't set thread ledger callback for cpu_time entry");
1638 }
1639
1640 thread_ledgers.cpu_time = idx;
1641 thread_ledger_template = t;
1642 }
1643
1644 /*
1645 * Returns currently applied CPU usage limit, or 0/0 if none is applied.
1646 */
1647 int
1648 thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns)
1649 {
1650 int64_t abstime = 0;
1651 uint64_t limittime = 0;
1652 thread_t thread = current_thread();
1653
1654 *percentage = 0;
1655 *interval_ns = 0;
1656 *action = 0;
1657
1658 if (thread->t_threadledger == LEDGER_NULL) {
1659 /*
1660 * This thread has no per-thread ledger, so it can't possibly
1661 * have a CPU limit applied.
1662 */
1663 return (KERN_SUCCESS);
1664 }
1665
1666 ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns);
1667 ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime);
1668
1669 if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) {
1670 /*
1671 * This thread's CPU time ledger has no period or limit; so it
1672 * doesn't have a CPU limit applied.
1673 */
1674 return (KERN_SUCCESS);
1675 }
1676
1677 /*
1678 * This calculation is the converse to the one in thread_set_cpulimit().
1679 */
1680 absolutetime_to_nanoseconds(abstime, &limittime);
1681 *percentage = (limittime * 100ULL) / *interval_ns;
1682 assert(*percentage <= 100);
1683
1684 if (thread->options & TH_OPT_PROC_CPULIMIT) {
1685 assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0);
1686
1687 *action = THREAD_CPULIMIT_BLOCK;
1688 } else if (thread->options & TH_OPT_PRVT_CPULIMIT) {
1689 assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0);
1690
1691 *action = THREAD_CPULIMIT_EXCEPTION;
1692 } else {
1693 *action = THREAD_CPULIMIT_DISABLE;
1694 }
1695
1696 return (KERN_SUCCESS);
1697 }
1698
1699 /*
1700 * Set CPU usage limit on a thread.
1701 *
1702 * Calling with percentage of 0 will unset the limit for this thread.
1703 */
1704 int
1705 thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns)
1706 {
1707 thread_t thread = current_thread();
1708 ledger_t l;
1709 uint64_t limittime = 0;
1710 uint64_t abstime = 0;
1711
1712 assert(percentage <= 100);
1713
1714 if (action == THREAD_CPULIMIT_DISABLE) {
1715 /*
1716 * Remove CPU limit, if any exists.
1717 */
1718 if (thread->t_threadledger != LEDGER_NULL) {
1719 l = thread->t_threadledger;
1720 /*
1721 * The only way to get a per-thread ledger is via CPU limits.
1722 */
1723 assert(thread->options & (TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT));
1724 thread->t_threadledger = NULL;
1725 ledger_dereference(l);
1726 thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT);
1727 }
1728
1729 return (0);
1730 }
1731
1732 if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) {
1733 return (KERN_INVALID_ARGUMENT);
1734 }
1735
1736 l = thread->t_threadledger;
1737 if (l == LEDGER_NULL) {
1738 /*
1739 * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active.
1740 */
1741 if ((l = ledger_instantiate(thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL)
1742 return (KERN_RESOURCE_SHORTAGE);
1743
1744 /*
1745 * We are the first to create this thread's ledger, so only activate our entry.
1746 */
1747 ledger_entry_setactive(l, thread_ledgers.cpu_time);
1748 thread->t_threadledger = l;
1749 }
1750
1751 /*
1752 * The limit is specified as a percentage of CPU over an interval in nanoseconds.
1753 * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit.
1754 */
1755 limittime = (interval_ns * percentage) / 100;
1756 nanoseconds_to_absolutetime(limittime, &abstime);
1757 ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct);
1758 /*
1759 * Refill the thread's allotted CPU time every interval_ns nanoseconds.
1760 */
1761 ledger_set_period(l, thread_ledgers.cpu_time, interval_ns);
1762
1763 if (action == THREAD_CPULIMIT_EXCEPTION) {
1764 /*
1765 * We don't support programming the CPU usage monitor on a task if any of its
1766 * threads have a per-thread blocking CPU limit configured.
1767 */
1768 if (thread->options & TH_OPT_PRVT_CPULIMIT) {
1769 panic("CPU usage monitor activated, but blocking thread limit exists");
1770 }
1771
1772 /*
1773 * Make a note that this thread's CPU limit is being used for the task-wide CPU
1774 * usage monitor. We don't have to arm the callback which will trigger the
1775 * exception, because that was done for us in ledger_instantiate (because the
1776 * ledger template used has a default callback).
1777 */
1778 thread->options |= TH_OPT_PROC_CPULIMIT;
1779 } else {
1780 /*
1781 * We deliberately override any CPU limit imposed by a task-wide limit (eg
1782 * CPU usage monitor).
1783 */
1784 thread->options &= ~TH_OPT_PROC_CPULIMIT;
1785
1786 thread->options |= TH_OPT_PRVT_CPULIMIT;
1787 /* The per-thread ledger template by default has a callback for CPU time */
1788 ledger_disable_callback(l, thread_ledgers.cpu_time);
1789 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
1790 }
1791
1792 return (0);
1793 }
1794
1795 int split_funnel_off = 0;
1796 lck_grp_t *funnel_lck_grp = LCK_GRP_NULL;
1797 lck_grp_attr_t *funnel_lck_grp_attr;
1798 lck_attr_t *funnel_lck_attr;
1799
1800 funnel_t *
1801 funnel_alloc(
1802 int type)
1803 {
1804 lck_mtx_t *m;
1805 funnel_t *fnl;
1806
1807 if (funnel_lck_grp == LCK_GRP_NULL) {
1808 funnel_lck_grp_attr = lck_grp_attr_alloc_init();
1809
1810 funnel_lck_grp = lck_grp_alloc_init("Funnel", funnel_lck_grp_attr);
1811
1812 funnel_lck_attr = lck_attr_alloc_init();
1813 }
1814 if ((fnl = (funnel_t *)kalloc(sizeof(funnel_t))) != 0){
1815 bzero((void *)fnl, sizeof(funnel_t));
1816 if ((m = lck_mtx_alloc_init(funnel_lck_grp, funnel_lck_attr)) == (lck_mtx_t *)NULL) {
1817 kfree(fnl, sizeof(funnel_t));
1818 return(THR_FUNNEL_NULL);
1819 }
1820 fnl->fnl_mutex = m;
1821 fnl->fnl_type = type;
1822 }
1823 return(fnl);
1824 }
1825
1826 void
1827 funnel_free(
1828 funnel_t * fnl)
1829 {
1830 lck_mtx_free(fnl->fnl_mutex, funnel_lck_grp);
1831 if (fnl->fnl_oldmutex)
1832 lck_mtx_free(fnl->fnl_oldmutex, funnel_lck_grp);
1833 kfree(fnl, sizeof(funnel_t));
1834 }
1835
1836 void
1837 funnel_lock(
1838 funnel_t * fnl)
1839 {
1840 lck_mtx_lock(fnl->fnl_mutex);
1841 fnl->fnl_mtxholder = current_thread();
1842 }
1843
1844 void
1845 funnel_unlock(
1846 funnel_t * fnl)
1847 {
1848 lck_mtx_unlock(fnl->fnl_mutex);
1849 fnl->fnl_mtxholder = NULL;
1850 fnl->fnl_mtxrelease = current_thread();
1851 }
1852
1853 funnel_t *
1854 thread_funnel_get(
1855 void)
1856 {
1857 thread_t th = current_thread();
1858
1859 if (th->funnel_state & TH_FN_OWNED) {
1860 return(th->funnel_lock);
1861 }
1862 return(THR_FUNNEL_NULL);
1863 }
1864
1865 boolean_t
1866 thread_funnel_set(
1867 funnel_t * fnl,
1868 boolean_t funneled)
1869 {
1870 thread_t cur_thread;
1871 boolean_t funnel_state_prev;
1872 boolean_t intr;
1873
1874 cur_thread = current_thread();
1875 funnel_state_prev = ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED);
1876
1877 if (funnel_state_prev != funneled) {
1878 intr = ml_set_interrupts_enabled(FALSE);
1879
1880 if (funneled == TRUE) {
1881 if (cur_thread->funnel_lock)
1882 panic("Funnel lock called when holding one %p", cur_thread->funnel_lock);
1883 KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE,
1884 fnl, 1, 0, 0, 0);
1885 funnel_lock(fnl);
1886 KERNEL_DEBUG(0x6032434 | DBG_FUNC_NONE,
1887 fnl, 1, 0, 0, 0);
1888 cur_thread->funnel_state |= TH_FN_OWNED;
1889 cur_thread->funnel_lock = fnl;
1890 } else {
1891 if(cur_thread->funnel_lock->fnl_mutex != fnl->fnl_mutex)
1892 panic("Funnel unlock when not holding funnel");
1893 cur_thread->funnel_state &= ~TH_FN_OWNED;
1894 KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE,
1895 fnl, 1, 0, 0, 0);
1896
1897 cur_thread->funnel_lock = THR_FUNNEL_NULL;
1898 funnel_unlock(fnl);
1899 }
1900 (void)ml_set_interrupts_enabled(intr);
1901 } else {
1902 /* if we are trying to acquire funnel recursively
1903 * check for funnel to be held already
1904 */
1905 if (funneled && (fnl->fnl_mutex != cur_thread->funnel_lock->fnl_mutex)) {
1906 panic("thread_funnel_set: already holding a different funnel");
1907 }
1908 }
1909 return(funnel_state_prev);
1910 }
1911
1912 static void
1913 sched_call_null(
1914 __unused int type,
1915 __unused thread_t thread)
1916 {
1917 return;
1918 }
1919
1920 void
1921 thread_sched_call(
1922 thread_t thread,
1923 sched_call_t call)
1924 {
1925 thread->sched_call = (call != NULL)? call: sched_call_null;
1926 }
1927
1928 void
1929 thread_static_param(
1930 thread_t thread,
1931 boolean_t state)
1932 {
1933 thread_mtx_lock(thread);
1934 thread->static_param = state;
1935 thread_mtx_unlock(thread);
1936 }
1937
1938 uint64_t
1939 thread_tid(
1940 thread_t thread)
1941 {
1942 return (thread != THREAD_NULL? thread->thread_id: 0);
1943 }
1944
1945 uint16_t thread_set_tag(thread_t th, uint16_t tag) {
1946 return thread_set_tag_internal(th, tag);
1947 }
1948 uint16_t thread_get_tag(thread_t th) {
1949 return thread_get_tag_internal(th);
1950 }
1951
1952 uint64_t
1953 thread_dispatchqaddr(
1954 thread_t thread)
1955 {
1956 uint64_t dispatchqueue_addr = 0;
1957 uint64_t thread_handle = 0;
1958
1959 if (thread != THREAD_NULL) {
1960 thread_handle = thread->machine.cthread_self;
1961
1962 if (thread->task->bsd_info)
1963 dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
1964 }
1965
1966 return (dispatchqueue_addr);
1967 }
1968
1969 /*
1970 * Export routines to other components for things that are done as macros
1971 * within the osfmk component.
1972 */
1973
1974 #undef thread_reference
1975 void thread_reference(thread_t thread);
1976 void
1977 thread_reference(
1978 thread_t thread)
1979 {
1980 if (thread != THREAD_NULL)
1981 thread_reference_internal(thread);
1982 }
1983
1984 #undef thread_should_halt
1985
1986 boolean_t
1987 thread_should_halt(
1988 thread_t th)
1989 {
1990 return (thread_should_halt_fast(th));
1991 }
1992
1993 #if CONFIG_DTRACE
1994 uint32_t dtrace_get_thread_predcache(thread_t thread)
1995 {
1996 if (thread != THREAD_NULL)
1997 return thread->t_dtrace_predcache;
1998 else
1999 return 0;
2000 }
2001
2002 int64_t dtrace_get_thread_vtime(thread_t thread)
2003 {
2004 if (thread != THREAD_NULL)
2005 return thread->t_dtrace_vtime;
2006 else
2007 return 0;
2008 }
2009
2010 int64_t dtrace_get_thread_tracing(thread_t thread)
2011 {
2012 if (thread != THREAD_NULL)
2013 return thread->t_dtrace_tracing;
2014 else
2015 return 0;
2016 }
2017
2018 boolean_t dtrace_get_thread_reentering(thread_t thread)
2019 {
2020 if (thread != THREAD_NULL)
2021 return (thread->options & TH_OPT_DTRACE) ? TRUE : FALSE;
2022 else
2023 return 0;
2024 }
2025
2026 vm_offset_t dtrace_get_kernel_stack(thread_t thread)
2027 {
2028 if (thread != THREAD_NULL)
2029 return thread->kernel_stack;
2030 else
2031 return 0;
2032 }
2033
2034 int64_t dtrace_calc_thread_recent_vtime(thread_t thread)
2035 {
2036 if (thread != THREAD_NULL) {
2037 processor_t processor = current_processor();
2038 uint64_t abstime = mach_absolute_time();
2039 timer_t timer;
2040
2041 timer = PROCESSOR_DATA(processor, thread_timer);
2042
2043 return timer_grab(&(thread->system_timer)) + timer_grab(&(thread->user_timer)) +
2044 (abstime - timer->tstamp); /* XXX need interrupts off to prevent missed time? */
2045 } else
2046 return 0;
2047 }
2048
2049 void dtrace_set_thread_predcache(thread_t thread, uint32_t predcache)
2050 {
2051 if (thread != THREAD_NULL)
2052 thread->t_dtrace_predcache = predcache;
2053 }
2054
2055 void dtrace_set_thread_vtime(thread_t thread, int64_t vtime)
2056 {
2057 if (thread != THREAD_NULL)
2058 thread->t_dtrace_vtime = vtime;
2059 }
2060
2061 void dtrace_set_thread_tracing(thread_t thread, int64_t accum)
2062 {
2063 if (thread != THREAD_NULL)
2064 thread->t_dtrace_tracing = accum;
2065 }
2066
2067 void dtrace_set_thread_reentering(thread_t thread, boolean_t vbool)
2068 {
2069 if (thread != THREAD_NULL) {
2070 if (vbool)
2071 thread->options |= TH_OPT_DTRACE;
2072 else
2073 thread->options &= (~TH_OPT_DTRACE);
2074 }
2075 }
2076
2077 vm_offset_t dtrace_set_thread_recover(thread_t thread, vm_offset_t recover)
2078 {
2079 vm_offset_t prev = 0;
2080
2081 if (thread != THREAD_NULL) {
2082 prev = thread->recover;
2083 thread->recover = recover;
2084 }
2085 return prev;
2086 }
2087
2088 void dtrace_thread_bootstrap(void)
2089 {
2090 task_t task = current_task();
2091
2092 if (task->thread_count == 1) {
2093 thread_t thread = current_thread();
2094 if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) {
2095 thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS;
2096 DTRACE_PROC(exec__success);
2097 }
2098 DTRACE_PROC(start);
2099 }
2100 DTRACE_PROC(lwp__start);
2101
2102 }
2103
2104 void
2105 dtrace_thread_didexec(thread_t thread)
2106 {
2107 thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS;
2108 }
2109 #endif /* CONFIG_DTRACE */