]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/thread.c
xnu-2422.1.72.tar.gz
[apple/xnu.git] / osfmk / kern / thread.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: kern/thread.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub
61 * Date: 1986
62 *
63 * Thread management primitives implementation.
64 */
65 /*
66 * Copyright (c) 1993 The University of Utah and
67 * the Computer Systems Laboratory (CSL). All rights reserved.
68 *
69 * Permission to use, copy, modify and distribute this software and its
70 * documentation is hereby granted, provided that both the copyright
71 * notice and this permission notice appear in all copies of the
72 * software, derivative works or modified versions, and any portions
73 * thereof, and that both notices appear in supporting documentation.
74 *
75 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
76 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
77 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
78 *
79 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
80 * improvements that they make and grant CSL redistribution rights.
81 *
82 */
83
84 #include <mach/mach_types.h>
85 #include <mach/boolean.h>
86 #include <mach/policy.h>
87 #include <mach/thread_info.h>
88 #include <mach/thread_special_ports.h>
89 #include <mach/thread_status.h>
90 #include <mach/time_value.h>
91 #include <mach/vm_param.h>
92
93 #include <machine/thread.h>
94 #include <machine/pal_routines.h>
95 #include <machine/limits.h>
96
97 #include <kern/kern_types.h>
98 #include <kern/kalloc.h>
99 #include <kern/cpu_data.h>
100 #include <kern/counters.h>
101 #include <kern/extmod_statistics.h>
102 #include <kern/ipc_mig.h>
103 #include <kern/ipc_tt.h>
104 #include <kern/mach_param.h>
105 #include <kern/machine.h>
106 #include <kern/misc_protos.h>
107 #include <kern/processor.h>
108 #include <kern/queue.h>
109 #include <kern/sched.h>
110 #include <kern/sched_prim.h>
111 #include <kern/sync_lock.h>
112 #include <kern/syscall_subr.h>
113 #include <kern/task.h>
114 #include <kern/thread.h>
115 #include <kern/host.h>
116 #include <kern/zalloc.h>
117 #include <kern/assert.h>
118 #include <kern/exc_resource.h>
119 #include <kern/telemetry.h>
120 #if KPC
121 #include <kern/kpc.h>
122 #endif
123
124 #include <ipc/ipc_kmsg.h>
125 #include <ipc/ipc_port.h>
126
127 #include <vm/vm_kern.h>
128 #include <vm/vm_pageout.h>
129
130 #include <sys/kdebug.h>
131
132 #include <mach/sdt.h>
133
134 /*
135 * Exported interfaces
136 */
137 #include <mach/task_server.h>
138 #include <mach/thread_act_server.h>
139 #include <mach/mach_host_server.h>
140 #include <mach/host_priv_server.h>
141
142 static struct zone *thread_zone;
143 static lck_grp_attr_t thread_lck_grp_attr;
144 lck_attr_t thread_lck_attr;
145 lck_grp_t thread_lck_grp;
146
147 decl_simple_lock_data(static,thread_stack_lock)
148 static queue_head_t thread_stack_queue;
149
150 decl_simple_lock_data(static,thread_terminate_lock)
151 static queue_head_t thread_terminate_queue;
152
153 static struct thread thread_template, init_thread;
154
155 static void sched_call_null(
156 int type,
157 thread_t thread);
158
159 #ifdef MACH_BSD
160 extern void proc_exit(void *);
161 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
162 extern int proc_selfpid(void);
163 extern char * proc_name_address(void *p);
164 #endif /* MACH_BSD */
165
166 extern int disable_exc_resource;
167 extern int debug_task;
168 int thread_max = CONFIG_THREAD_MAX; /* Max number of threads */
169 int task_threadmax = CONFIG_THREAD_MAX;
170
171 static uint64_t thread_unique_id = 0;
172
173 struct _thread_ledger_indices thread_ledgers = { -1 };
174 static ledger_template_t thread_ledger_template = NULL;
175 void init_thread_ledgers(void);
176 int task_disable_cpumon(task_t task);
177
178 /*
179 * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry.
180 *
181 * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user
182 * stacktraces, aka micro-stackshots)
183 */
184 #define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70
185
186 int cpumon_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
187 void __attribute__((noinline)) THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(void);
188
189 /*
190 * The smallest interval over which we support limiting CPU consumption is 1ms
191 */
192 #define MINIMUM_CPULIMIT_INTERVAL_MS 1
193
194 void
195 thread_bootstrap(void)
196 {
197 /*
198 * Fill in a template thread for fast initialization.
199 */
200
201 thread_template.runq = PROCESSOR_NULL;
202
203 thread_template.ref_count = 2;
204
205 thread_template.reason = AST_NONE;
206 thread_template.at_safe_point = FALSE;
207 thread_template.wait_event = NO_EVENT64;
208 thread_template.wait_queue = WAIT_QUEUE_NULL;
209 thread_template.wait_result = THREAD_WAITING;
210 thread_template.options = THREAD_ABORTSAFE;
211 thread_template.state = TH_WAIT | TH_UNINT;
212 thread_template.wake_active = FALSE;
213 thread_template.continuation = THREAD_CONTINUE_NULL;
214 thread_template.parameter = NULL;
215
216 thread_template.importance = 0;
217 thread_template.sched_mode = TH_MODE_NONE;
218 thread_template.sched_flags = 0;
219 thread_template.saved_mode = TH_MODE_NONE;
220 thread_template.safe_release = 0;
221
222 thread_template.priority = 0;
223 thread_template.sched_pri = 0;
224 thread_template.max_priority = 0;
225 thread_template.task_priority = 0;
226 thread_template.promotions = 0;
227 thread_template.pending_promoter_index = 0;
228 thread_template.pending_promoter[0] =
229 thread_template.pending_promoter[1] = NULL;
230 thread_template.rwlock_count = 0;
231
232 thread_template.realtime.deadline = UINT64_MAX;
233
234 thread_template.current_quantum = 0;
235 thread_template.last_run_time = 0;
236 thread_template.last_quantum_refill_time = 0;
237
238 thread_template.computation_metered = 0;
239 thread_template.computation_epoch = 0;
240
241 #if defined(CONFIG_SCHED_TRADITIONAL)
242 thread_template.sched_stamp = 0;
243 thread_template.pri_shift = INT8_MAX;
244 thread_template.sched_usage = 0;
245 thread_template.cpu_usage = thread_template.cpu_delta = 0;
246 #endif
247 thread_template.c_switch = thread_template.p_switch = thread_template.ps_switch = 0;
248
249 thread_template.bound_processor = PROCESSOR_NULL;
250 thread_template.last_processor = PROCESSOR_NULL;
251
252 thread_template.sched_call = sched_call_null;
253
254 timer_init(&thread_template.user_timer);
255 timer_init(&thread_template.system_timer);
256 thread_template.user_timer_save = 0;
257 thread_template.system_timer_save = 0;
258 thread_template.vtimer_user_save = 0;
259 thread_template.vtimer_prof_save = 0;
260 thread_template.vtimer_rlim_save = 0;
261
262 thread_template.wait_timer_is_set = FALSE;
263 thread_template.wait_timer_active = 0;
264
265 thread_template.depress_timer_active = 0;
266
267 thread_template.special_handler.handler = special_handler;
268 thread_template.special_handler.next = NULL;
269
270 thread_template.funnel_lock = THR_FUNNEL_NULL;
271 thread_template.funnel_state = 0;
272 thread_template.recover = (vm_offset_t)NULL;
273
274 thread_template.map = VM_MAP_NULL;
275
276 #if CONFIG_DTRACE
277 thread_template.t_dtrace_predcache = 0;
278 thread_template.t_dtrace_vtime = 0;
279 thread_template.t_dtrace_tracing = 0;
280 #endif /* CONFIG_DTRACE */
281
282 #if KPC
283 thread_template.kpc_buf = NULL;
284 #endif
285
286 thread_template.t_chud = 0;
287 thread_template.t_page_creation_count = 0;
288 thread_template.t_page_creation_time = 0;
289
290 thread_template.affinity_set = NULL;
291
292 thread_template.syscalls_unix = 0;
293 thread_template.syscalls_mach = 0;
294
295 thread_template.t_ledger = LEDGER_NULL;
296 thread_template.t_threadledger = LEDGER_NULL;
297
298 thread_template.requested_policy = default_task_requested_policy;
299 thread_template.effective_policy = default_task_effective_policy;
300 thread_template.pended_policy = default_task_pended_policy;
301
302 thread_template.iotier_override = THROTTLE_LEVEL_NONE;
303
304 thread_template.thread_callout_interrupt_wakeups = thread_template.thread_callout_platform_idle_wakeups = 0;
305
306 thread_template.thread_timer_wakeups_bin_1 = thread_template.thread_timer_wakeups_bin_2 = 0;
307 thread_template.callout_woken_from_icontext = thread_template.callout_woken_from_platform_idle = 0;
308
309 thread_template.thread_tag = 0;
310
311 init_thread = thread_template;
312 machine_set_current_thread(&init_thread);
313 }
314
315 void
316 thread_init(void)
317 {
318 thread_zone = zinit(
319 sizeof(struct thread),
320 thread_max * sizeof(struct thread),
321 THREAD_CHUNK * sizeof(struct thread),
322 "threads");
323
324 lck_grp_attr_setdefault(&thread_lck_grp_attr);
325 lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr);
326 lck_attr_setdefault(&thread_lck_attr);
327
328 stack_init();
329
330 /*
331 * Initialize any machine-dependent
332 * per-thread structures necessary.
333 */
334 machine_thread_init();
335
336 if (!PE_parse_boot_argn("cpumon_ustackshots_trigger_pct", &cpumon_ustackshots_trigger_pct,
337 sizeof (cpumon_ustackshots_trigger_pct))) {
338 cpumon_ustackshots_trigger_pct = CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT;
339 }
340
341 init_thread_ledgers();
342 }
343
344 static void
345 thread_terminate_continue(void)
346 {
347 panic("thread_terminate_continue");
348 /*NOTREACHED*/
349 }
350
351 /*
352 * thread_terminate_self:
353 */
354 void
355 thread_terminate_self(void)
356 {
357 thread_t thread = current_thread();
358
359 task_t task;
360 spl_t s;
361 int threadcnt;
362
363 pal_thread_terminate_self(thread);
364
365 DTRACE_PROC(lwp__exit);
366
367 thread_mtx_lock(thread);
368
369 ipc_thread_disable(thread);
370
371 thread_mtx_unlock(thread);
372
373 s = splsched();
374 thread_lock(thread);
375
376 /*
377 * Cancel priority depression, wait for concurrent expirations
378 * on other processors.
379 */
380 if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
381 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
382
383 if (timer_call_cancel(&thread->depress_timer))
384 thread->depress_timer_active--;
385 }
386
387 while (thread->depress_timer_active > 0) {
388 thread_unlock(thread);
389 splx(s);
390
391 delay(1);
392
393 s = splsched();
394 thread_lock(thread);
395 }
396
397 thread_sched_call(thread, NULL);
398
399 thread_unlock(thread);
400 splx(s);
401
402 thread_policy_reset(thread);
403
404
405 task = thread->task;
406 uthread_cleanup(task, thread->uthread, task->bsd_info);
407 threadcnt = hw_atomic_sub(&task->active_thread_count, 1);
408
409 /*
410 * If we are the last thread to terminate and the task is
411 * associated with a BSD process, perform BSD process exit.
412 */
413 if (threadcnt == 0 && task->bsd_info != NULL)
414 proc_exit(task->bsd_info);
415
416 uthread_cred_free(thread->uthread);
417
418 s = splsched();
419 thread_lock(thread);
420
421 /*
422 * Cancel wait timer, and wait for
423 * concurrent expirations.
424 */
425 if (thread->wait_timer_is_set) {
426 thread->wait_timer_is_set = FALSE;
427
428 if (timer_call_cancel(&thread->wait_timer))
429 thread->wait_timer_active--;
430 }
431
432 while (thread->wait_timer_active > 0) {
433 thread_unlock(thread);
434 splx(s);
435
436 delay(1);
437
438 s = splsched();
439 thread_lock(thread);
440 }
441
442 /*
443 * If there is a reserved stack, release it.
444 */
445 if (thread->reserved_stack != 0) {
446 stack_free_reserved(thread);
447 thread->reserved_stack = 0;
448 }
449
450 /*
451 * Mark thread as terminating, and block.
452 */
453 thread->state |= TH_TERMINATE;
454 thread_mark_wait_locked(thread, THREAD_UNINT);
455 assert(thread->promotions == 0);
456 assert(thread->rwlock_count == 0);
457 thread_unlock(thread);
458 /* splsched */
459
460 thread_block((thread_continue_t)thread_terminate_continue);
461 /*NOTREACHED*/
462 }
463
464 void
465 thread_deallocate(
466 thread_t thread)
467 {
468 task_t task;
469
470 if (thread == THREAD_NULL)
471 return;
472
473 if (thread_deallocate_internal(thread) > 0)
474 return;
475
476 if(!(thread->state & TH_TERMINATE2))
477 panic("thread_deallocate: thread not properly terminated\n");
478
479 #if KPC
480 kpc_thread_destroy(thread);
481 #endif
482
483
484 ipc_thread_terminate(thread);
485
486 task = thread->task;
487
488 #ifdef MACH_BSD
489 {
490 void *ut = thread->uthread;
491
492 thread->uthread = NULL;
493 uthread_zone_free(ut);
494 }
495 #endif /* MACH_BSD */
496
497 if (thread->t_ledger)
498 ledger_dereference(thread->t_ledger);
499 if (thread->t_threadledger)
500 ledger_dereference(thread->t_threadledger);
501
502 if (thread->kernel_stack != 0)
503 stack_free(thread);
504
505 lck_mtx_destroy(&thread->mutex, &thread_lck_grp);
506 machine_thread_destroy(thread);
507
508 task_deallocate(task);
509
510 zfree(thread_zone, thread);
511 }
512
513 /*
514 * thread_terminate_daemon:
515 *
516 * Perform final clean up for terminating threads.
517 */
518 static void
519 thread_terminate_daemon(void)
520 {
521 thread_t self, thread;
522 task_t task;
523
524 self = current_thread();
525 self->options |= TH_OPT_SYSTEM_CRITICAL;
526
527 (void)splsched();
528 simple_lock(&thread_terminate_lock);
529
530 while ((thread = (thread_t)dequeue_head(&thread_terminate_queue)) != THREAD_NULL) {
531 simple_unlock(&thread_terminate_lock);
532 (void)spllo();
533
534 task = thread->task;
535
536 task_lock(task);
537 task->total_user_time += timer_grab(&thread->user_timer);
538 if (thread->precise_user_kernel_time) {
539 task->total_system_time += timer_grab(&thread->system_timer);
540 } else {
541 task->total_user_time += timer_grab(&thread->system_timer);
542 }
543
544 task->c_switch += thread->c_switch;
545 task->p_switch += thread->p_switch;
546 task->ps_switch += thread->ps_switch;
547
548 task->syscalls_unix += thread->syscalls_unix;
549 task->syscalls_mach += thread->syscalls_mach;
550
551 task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
552 task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
553 queue_remove(&task->threads, thread, thread_t, task_threads);
554 task->thread_count--;
555
556 /*
557 * If the task is being halted, and there is only one thread
558 * left in the task after this one, then wakeup that thread.
559 */
560 if (task->thread_count == 1 && task->halting)
561 thread_wakeup((event_t)&task->halting);
562
563 task_unlock(task);
564
565 lck_mtx_lock(&tasks_threads_lock);
566 queue_remove(&threads, thread, thread_t, threads);
567 threads_count--;
568 lck_mtx_unlock(&tasks_threads_lock);
569
570 thread_deallocate(thread);
571
572 (void)splsched();
573 simple_lock(&thread_terminate_lock);
574 }
575
576 assert_wait((event_t)&thread_terminate_queue, THREAD_UNINT);
577 simple_unlock(&thread_terminate_lock);
578 /* splsched */
579
580 self->options &= ~TH_OPT_SYSTEM_CRITICAL;
581 thread_block((thread_continue_t)thread_terminate_daemon);
582 /*NOTREACHED*/
583 }
584
585 /*
586 * thread_terminate_enqueue:
587 *
588 * Enqueue a terminating thread for final disposition.
589 *
590 * Called at splsched.
591 */
592 void
593 thread_terminate_enqueue(
594 thread_t thread)
595 {
596 simple_lock(&thread_terminate_lock);
597 enqueue_tail(&thread_terminate_queue, (queue_entry_t)thread);
598 simple_unlock(&thread_terminate_lock);
599
600 thread_wakeup((event_t)&thread_terminate_queue);
601 }
602
603 /*
604 * thread_stack_daemon:
605 *
606 * Perform stack allocation as required due to
607 * invoke failures.
608 */
609 static void
610 thread_stack_daemon(void)
611 {
612 thread_t thread;
613 spl_t s;
614
615 s = splsched();
616 simple_lock(&thread_stack_lock);
617
618 while ((thread = (thread_t)dequeue_head(&thread_stack_queue)) != THREAD_NULL) {
619 simple_unlock(&thread_stack_lock);
620 splx(s);
621
622 /* allocate stack with interrupts enabled so that we can call into VM */
623 stack_alloc(thread);
624
625 s = splsched();
626 thread_lock(thread);
627 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
628 thread_unlock(thread);
629
630 simple_lock(&thread_stack_lock);
631 }
632
633 assert_wait((event_t)&thread_stack_queue, THREAD_UNINT);
634 simple_unlock(&thread_stack_lock);
635 splx(s);
636
637 thread_block((thread_continue_t)thread_stack_daemon);
638 /*NOTREACHED*/
639 }
640
641 /*
642 * thread_stack_enqueue:
643 *
644 * Enqueue a thread for stack allocation.
645 *
646 * Called at splsched.
647 */
648 void
649 thread_stack_enqueue(
650 thread_t thread)
651 {
652 simple_lock(&thread_stack_lock);
653 enqueue_tail(&thread_stack_queue, (queue_entry_t)thread);
654 simple_unlock(&thread_stack_lock);
655
656 thread_wakeup((event_t)&thread_stack_queue);
657 }
658
659 void
660 thread_daemon_init(void)
661 {
662 kern_return_t result;
663 thread_t thread = NULL;
664
665 simple_lock_init(&thread_terminate_lock, 0);
666 queue_init(&thread_terminate_queue);
667
668 result = kernel_thread_start_priority((thread_continue_t)thread_terminate_daemon, NULL, MINPRI_KERNEL, &thread);
669 if (result != KERN_SUCCESS)
670 panic("thread_daemon_init: thread_terminate_daemon");
671
672 thread_deallocate(thread);
673
674 simple_lock_init(&thread_stack_lock, 0);
675 queue_init(&thread_stack_queue);
676
677 result = kernel_thread_start_priority((thread_continue_t)thread_stack_daemon, NULL, BASEPRI_PREEMPT, &thread);
678 if (result != KERN_SUCCESS)
679 panic("thread_daemon_init: thread_stack_daemon");
680
681 thread_deallocate(thread);
682 }
683
684 /*
685 * Create a new thread.
686 * Doesn't start the thread running.
687 */
688 static kern_return_t
689 thread_create_internal(
690 task_t parent_task,
691 integer_t priority,
692 thread_continue_t continuation,
693 int options,
694 #define TH_OPTION_NONE 0x00
695 #define TH_OPTION_NOCRED 0x01
696 #define TH_OPTION_NOSUSP 0x02
697 thread_t *out_thread)
698 {
699 thread_t new_thread;
700 static thread_t first_thread;
701
702 /*
703 * Allocate a thread and initialize static fields
704 */
705 if (first_thread == THREAD_NULL)
706 new_thread = first_thread = current_thread();
707 else
708 new_thread = (thread_t)zalloc(thread_zone);
709 if (new_thread == THREAD_NULL)
710 return (KERN_RESOURCE_SHORTAGE);
711
712 if (new_thread != first_thread)
713 *new_thread = thread_template;
714
715 #ifdef MACH_BSD
716 new_thread->uthread = uthread_alloc(parent_task, new_thread, (options & TH_OPTION_NOCRED) != 0);
717 if (new_thread->uthread == NULL) {
718 zfree(thread_zone, new_thread);
719 return (KERN_RESOURCE_SHORTAGE);
720 }
721 #endif /* MACH_BSD */
722
723 if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) {
724 #ifdef MACH_BSD
725 void *ut = new_thread->uthread;
726
727 new_thread->uthread = NULL;
728 /* cred free may not be necessary */
729 uthread_cleanup(parent_task, ut, parent_task->bsd_info);
730 uthread_cred_free(ut);
731 uthread_zone_free(ut);
732 #endif /* MACH_BSD */
733
734 zfree(thread_zone, new_thread);
735 return (KERN_FAILURE);
736 }
737
738 new_thread->task = parent_task;
739
740 thread_lock_init(new_thread);
741 wake_lock_init(new_thread);
742
743 lck_mtx_init(&new_thread->mutex, &thread_lck_grp, &thread_lck_attr);
744
745 ipc_thread_init(new_thread);
746
747 new_thread->continuation = continuation;
748
749 lck_mtx_lock(&tasks_threads_lock);
750 task_lock(parent_task);
751
752 if ( !parent_task->active || parent_task->halting ||
753 ((options & TH_OPTION_NOSUSP) != 0 &&
754 parent_task->suspend_count > 0) ||
755 (parent_task->thread_count >= task_threadmax &&
756 parent_task != kernel_task) ) {
757 task_unlock(parent_task);
758 lck_mtx_unlock(&tasks_threads_lock);
759
760 #ifdef MACH_BSD
761 {
762 void *ut = new_thread->uthread;
763
764 new_thread->uthread = NULL;
765 uthread_cleanup(parent_task, ut, parent_task->bsd_info);
766 /* cred free may not be necessary */
767 uthread_cred_free(ut);
768 uthread_zone_free(ut);
769 }
770 #endif /* MACH_BSD */
771 ipc_thread_disable(new_thread);
772 ipc_thread_terminate(new_thread);
773 lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp);
774 machine_thread_destroy(new_thread);
775 zfree(thread_zone, new_thread);
776 return (KERN_FAILURE);
777 }
778
779 /* New threads inherit any default state on the task */
780 machine_thread_inherit_taskwide(new_thread, parent_task);
781
782 task_reference_internal(parent_task);
783
784 if (new_thread->task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
785 /*
786 * This task has a per-thread CPU limit; make sure this new thread
787 * gets its limit set too, before it gets out of the kernel.
788 */
789 set_astledger(new_thread);
790 }
791 new_thread->t_threadledger = LEDGER_NULL; /* per thread ledger is not inherited */
792 new_thread->t_ledger = new_thread->task->ledger;
793 if (new_thread->t_ledger)
794 ledger_reference(new_thread->t_ledger);
795
796 /* Cache the task's map */
797 new_thread->map = parent_task->map;
798
799 /* Chain the thread onto the task's list */
800 queue_enter(&parent_task->threads, new_thread, thread_t, task_threads);
801 parent_task->thread_count++;
802
803 /* So terminating threads don't need to take the task lock to decrement */
804 hw_atomic_add(&parent_task->active_thread_count, 1);
805
806 /* Protected by the tasks_threads_lock */
807 new_thread->thread_id = ++thread_unique_id;
808
809 queue_enter(&threads, new_thread, thread_t, threads);
810 threads_count++;
811
812 timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread);
813 timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread);
814
815 #if CONFIG_COUNTERS
816 /*
817 * If parent task has any reservations, they need to be propagated to this
818 * thread.
819 */
820 new_thread->t_chud = (TASK_PMC_FLAG == (parent_task->t_chud & TASK_PMC_FLAG)) ?
821 THREAD_PMC_FLAG : 0U;
822 #endif
823 #if KPC
824 kpc_thread_create(new_thread);
825 #endif
826
827 /* Only need to update policies pushed from task to thread */
828 new_thread->requested_policy.bg_iotier = parent_task->effective_policy.bg_iotier;
829 new_thread->requested_policy.terminated = parent_task->effective_policy.terminated;
830
831 /* Set the thread's scheduling parameters */
832 new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task);
833 new_thread->sched_flags = 0;
834 new_thread->max_priority = parent_task->max_priority;
835 new_thread->task_priority = parent_task->priority;
836 new_thread->priority = (priority < 0)? parent_task->priority: priority;
837 if (new_thread->priority > new_thread->max_priority)
838 new_thread->priority = new_thread->max_priority;
839 new_thread->importance = new_thread->priority - new_thread->task_priority;
840 new_thread->saved_importance = new_thread->importance;
841
842 #if defined(CONFIG_SCHED_TRADITIONAL)
843 new_thread->sched_stamp = sched_tick;
844 new_thread->pri_shift = sched_pri_shift;
845 #endif
846 SCHED(compute_priority)(new_thread, FALSE);
847
848 new_thread->active = TRUE;
849
850 *out_thread = new_thread;
851
852 {
853 long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
854
855 kdbg_trace_data(parent_task->bsd_info, &dbg_arg2);
856
857 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
858 TRACEDBG_CODE(DBG_TRACE_DATA, 1) | DBG_FUNC_NONE,
859 (vm_address_t)(uintptr_t)thread_tid(new_thread), dbg_arg2, 0, 0, 0);
860
861 kdbg_trace_string(parent_task->bsd_info,
862 &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
863
864 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
865 TRACEDBG_CODE(DBG_TRACE_STRING, 1) | DBG_FUNC_NONE,
866 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
867 }
868
869 DTRACE_PROC1(lwp__create, thread_t, *out_thread);
870
871 return (KERN_SUCCESS);
872 }
873
874 static kern_return_t
875 thread_create_internal2(
876 task_t task,
877 thread_t *new_thread,
878 boolean_t from_user)
879 {
880 kern_return_t result;
881 thread_t thread;
882
883 if (task == TASK_NULL || task == kernel_task)
884 return (KERN_INVALID_ARGUMENT);
885
886 result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, TH_OPTION_NONE, &thread);
887 if (result != KERN_SUCCESS)
888 return (result);
889
890 thread->user_stop_count = 1;
891 thread_hold(thread);
892 if (task->suspend_count > 0)
893 thread_hold(thread);
894
895 if (from_user)
896 extmod_statistics_incr_thread_create(task);
897
898 task_unlock(task);
899 lck_mtx_unlock(&tasks_threads_lock);
900
901 *new_thread = thread;
902
903 return (KERN_SUCCESS);
904 }
905
906 /* No prototype, since task_server.h has the _from_user version if KERNEL_SERVER */
907 kern_return_t
908 thread_create(
909 task_t task,
910 thread_t *new_thread);
911
912 kern_return_t
913 thread_create(
914 task_t task,
915 thread_t *new_thread)
916 {
917 return thread_create_internal2(task, new_thread, FALSE);
918 }
919
920 kern_return_t
921 thread_create_from_user(
922 task_t task,
923 thread_t *new_thread)
924 {
925 return thread_create_internal2(task, new_thread, TRUE);
926 }
927
928 static kern_return_t
929 thread_create_running_internal2(
930 register task_t task,
931 int flavor,
932 thread_state_t new_state,
933 mach_msg_type_number_t new_state_count,
934 thread_t *new_thread,
935 boolean_t from_user)
936 {
937 register kern_return_t result;
938 thread_t thread;
939
940 if (task == TASK_NULL || task == kernel_task)
941 return (KERN_INVALID_ARGUMENT);
942
943 result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, TH_OPTION_NONE, &thread);
944 if (result != KERN_SUCCESS)
945 return (result);
946
947 result = machine_thread_set_state(
948 thread, flavor, new_state, new_state_count);
949 if (result != KERN_SUCCESS) {
950 task_unlock(task);
951 lck_mtx_unlock(&tasks_threads_lock);
952
953 thread_terminate(thread);
954 thread_deallocate(thread);
955 return (result);
956 }
957
958 thread_mtx_lock(thread);
959 thread_start_internal(thread);
960 thread_mtx_unlock(thread);
961
962 if (from_user)
963 extmod_statistics_incr_thread_create(task);
964
965 task_unlock(task);
966 lck_mtx_unlock(&tasks_threads_lock);
967
968 *new_thread = thread;
969
970 return (result);
971 }
972
973 /* Prototype, see justification above */
974 kern_return_t
975 thread_create_running(
976 register task_t task,
977 int flavor,
978 thread_state_t new_state,
979 mach_msg_type_number_t new_state_count,
980 thread_t *new_thread);
981
982 kern_return_t
983 thread_create_running(
984 register task_t task,
985 int flavor,
986 thread_state_t new_state,
987 mach_msg_type_number_t new_state_count,
988 thread_t *new_thread)
989 {
990 return thread_create_running_internal2(
991 task, flavor, new_state, new_state_count,
992 new_thread, FALSE);
993 }
994
995 kern_return_t
996 thread_create_running_from_user(
997 register task_t task,
998 int flavor,
999 thread_state_t new_state,
1000 mach_msg_type_number_t new_state_count,
1001 thread_t *new_thread)
1002 {
1003 return thread_create_running_internal2(
1004 task, flavor, new_state, new_state_count,
1005 new_thread, TRUE);
1006 }
1007
1008 kern_return_t
1009 thread_create_workq(
1010 task_t task,
1011 thread_continue_t thread_return,
1012 thread_t *new_thread)
1013 {
1014 kern_return_t result;
1015 thread_t thread;
1016
1017 if (task == TASK_NULL || task == kernel_task)
1018 return (KERN_INVALID_ARGUMENT);
1019
1020 result = thread_create_internal(task, -1, thread_return, TH_OPTION_NOCRED | TH_OPTION_NOSUSP, &thread);
1021 if (result != KERN_SUCCESS)
1022 return (result);
1023
1024 thread->user_stop_count = 1;
1025 thread_hold(thread);
1026 if (task->suspend_count > 0)
1027 thread_hold(thread);
1028
1029 task_unlock(task);
1030 lck_mtx_unlock(&tasks_threads_lock);
1031
1032 *new_thread = thread;
1033
1034 return (KERN_SUCCESS);
1035 }
1036
1037 /*
1038 * kernel_thread_create:
1039 *
1040 * Create a thread in the kernel task
1041 * to execute in kernel context.
1042 */
1043 kern_return_t
1044 kernel_thread_create(
1045 thread_continue_t continuation,
1046 void *parameter,
1047 integer_t priority,
1048 thread_t *new_thread)
1049 {
1050 kern_return_t result;
1051 thread_t thread;
1052 task_t task = kernel_task;
1053
1054 result = thread_create_internal(task, priority, continuation, TH_OPTION_NONE, &thread);
1055 if (result != KERN_SUCCESS)
1056 return (result);
1057
1058 task_unlock(task);
1059 lck_mtx_unlock(&tasks_threads_lock);
1060
1061 stack_alloc(thread);
1062 assert(thread->kernel_stack != 0);
1063 thread->reserved_stack = thread->kernel_stack;
1064
1065 thread->parameter = parameter;
1066
1067 if(debug_task & 1)
1068 kprintf("kernel_thread_create: thread = %p continuation = %p\n", thread, continuation);
1069 *new_thread = thread;
1070
1071 return (result);
1072 }
1073
1074 kern_return_t
1075 kernel_thread_start_priority(
1076 thread_continue_t continuation,
1077 void *parameter,
1078 integer_t priority,
1079 thread_t *new_thread)
1080 {
1081 kern_return_t result;
1082 thread_t thread;
1083
1084 result = kernel_thread_create(continuation, parameter, priority, &thread);
1085 if (result != KERN_SUCCESS)
1086 return (result);
1087
1088 *new_thread = thread;
1089
1090 thread_mtx_lock(thread);
1091 thread_start_internal(thread);
1092 thread_mtx_unlock(thread);
1093
1094 return (result);
1095 }
1096
1097 kern_return_t
1098 kernel_thread_start(
1099 thread_continue_t continuation,
1100 void *parameter,
1101 thread_t *new_thread)
1102 {
1103 return kernel_thread_start_priority(continuation, parameter, -1, new_thread);
1104 }
1105
1106
1107 kern_return_t
1108 thread_info_internal(
1109 register thread_t thread,
1110 thread_flavor_t flavor,
1111 thread_info_t thread_info_out, /* ptr to OUT array */
1112 mach_msg_type_number_t *thread_info_count) /*IN/OUT*/
1113 {
1114 int state, flags;
1115 spl_t s;
1116
1117 if (thread == THREAD_NULL)
1118 return (KERN_INVALID_ARGUMENT);
1119
1120 if (flavor == THREAD_BASIC_INFO) {
1121 register thread_basic_info_t basic_info;
1122
1123 if (*thread_info_count < THREAD_BASIC_INFO_COUNT)
1124 return (KERN_INVALID_ARGUMENT);
1125
1126 basic_info = (thread_basic_info_t) thread_info_out;
1127
1128 s = splsched();
1129 thread_lock(thread);
1130
1131 /* fill in info */
1132
1133 thread_read_times(thread, &basic_info->user_time,
1134 &basic_info->system_time);
1135
1136 /*
1137 * Update lazy-evaluated scheduler info because someone wants it.
1138 */
1139 if (SCHED(can_update_priority)(thread))
1140 SCHED(update_priority)(thread);
1141
1142 basic_info->sleep_time = 0;
1143
1144 /*
1145 * To calculate cpu_usage, first correct for timer rate,
1146 * then for 5/8 ageing. The correction factor [3/5] is
1147 * (1/(5/8) - 1).
1148 */
1149 basic_info->cpu_usage = 0;
1150 #if defined(CONFIG_SCHED_TRADITIONAL)
1151 if (sched_tick_interval) {
1152 basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage
1153 * TH_USAGE_SCALE) / sched_tick_interval);
1154 basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
1155 }
1156 #endif
1157
1158 if (basic_info->cpu_usage > TH_USAGE_SCALE)
1159 basic_info->cpu_usage = TH_USAGE_SCALE;
1160
1161 basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
1162 POLICY_TIMESHARE: POLICY_RR);
1163
1164 flags = 0;
1165 if (thread->options & TH_OPT_IDLE_THREAD)
1166 flags |= TH_FLAGS_IDLE;
1167
1168 if (!thread->kernel_stack)
1169 flags |= TH_FLAGS_SWAPPED;
1170
1171 state = 0;
1172 if (thread->state & TH_TERMINATE)
1173 state = TH_STATE_HALTED;
1174 else
1175 if (thread->state & TH_RUN)
1176 state = TH_STATE_RUNNING;
1177 else
1178 if (thread->state & TH_UNINT)
1179 state = TH_STATE_UNINTERRUPTIBLE;
1180 else
1181 if (thread->state & TH_SUSP)
1182 state = TH_STATE_STOPPED;
1183 else
1184 if (thread->state & TH_WAIT)
1185 state = TH_STATE_WAITING;
1186
1187 basic_info->run_state = state;
1188 basic_info->flags = flags;
1189
1190 basic_info->suspend_count = thread->user_stop_count;
1191
1192 thread_unlock(thread);
1193 splx(s);
1194
1195 *thread_info_count = THREAD_BASIC_INFO_COUNT;
1196
1197 return (KERN_SUCCESS);
1198 }
1199 else
1200 if (flavor == THREAD_IDENTIFIER_INFO) {
1201 register thread_identifier_info_t identifier_info;
1202
1203 if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT)
1204 return (KERN_INVALID_ARGUMENT);
1205
1206 identifier_info = (thread_identifier_info_t) thread_info_out;
1207
1208 s = splsched();
1209 thread_lock(thread);
1210
1211 identifier_info->thread_id = thread->thread_id;
1212 identifier_info->thread_handle = thread->machine.cthread_self;
1213 if(thread->task->bsd_info) {
1214 identifier_info->dispatch_qaddr = identifier_info->thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
1215 } else {
1216 thread_unlock(thread);
1217 splx(s);
1218 return KERN_INVALID_ARGUMENT;
1219 }
1220
1221 thread_unlock(thread);
1222 splx(s);
1223 return KERN_SUCCESS;
1224 }
1225 else
1226 if (flavor == THREAD_SCHED_TIMESHARE_INFO) {
1227 policy_timeshare_info_t ts_info;
1228
1229 if (*thread_info_count < POLICY_TIMESHARE_INFO_COUNT)
1230 return (KERN_INVALID_ARGUMENT);
1231
1232 ts_info = (policy_timeshare_info_t)thread_info_out;
1233
1234 s = splsched();
1235 thread_lock(thread);
1236
1237 if (thread->sched_mode != TH_MODE_TIMESHARE) {
1238 thread_unlock(thread);
1239 splx(s);
1240
1241 return (KERN_INVALID_POLICY);
1242 }
1243
1244 ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
1245 if (ts_info->depressed) {
1246 ts_info->base_priority = DEPRESSPRI;
1247 ts_info->depress_priority = thread->priority;
1248 }
1249 else {
1250 ts_info->base_priority = thread->priority;
1251 ts_info->depress_priority = -1;
1252 }
1253
1254 ts_info->cur_priority = thread->sched_pri;
1255 ts_info->max_priority = thread->max_priority;
1256
1257 thread_unlock(thread);
1258 splx(s);
1259
1260 *thread_info_count = POLICY_TIMESHARE_INFO_COUNT;
1261
1262 return (KERN_SUCCESS);
1263 }
1264 else
1265 if (flavor == THREAD_SCHED_FIFO_INFO) {
1266 if (*thread_info_count < POLICY_FIFO_INFO_COUNT)
1267 return (KERN_INVALID_ARGUMENT);
1268
1269 return (KERN_INVALID_POLICY);
1270 }
1271 else
1272 if (flavor == THREAD_SCHED_RR_INFO) {
1273 policy_rr_info_t rr_info;
1274 uint32_t quantum_time;
1275 uint64_t quantum_ns;
1276
1277 if (*thread_info_count < POLICY_RR_INFO_COUNT)
1278 return (KERN_INVALID_ARGUMENT);
1279
1280 rr_info = (policy_rr_info_t) thread_info_out;
1281
1282 s = splsched();
1283 thread_lock(thread);
1284
1285 if (thread->sched_mode == TH_MODE_TIMESHARE) {
1286 thread_unlock(thread);
1287 splx(s);
1288
1289 return (KERN_INVALID_POLICY);
1290 }
1291
1292 rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
1293 if (rr_info->depressed) {
1294 rr_info->base_priority = DEPRESSPRI;
1295 rr_info->depress_priority = thread->priority;
1296 }
1297 else {
1298 rr_info->base_priority = thread->priority;
1299 rr_info->depress_priority = -1;
1300 }
1301
1302 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
1303 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
1304
1305 rr_info->max_priority = thread->max_priority;
1306 rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
1307
1308 thread_unlock(thread);
1309 splx(s);
1310
1311 *thread_info_count = POLICY_RR_INFO_COUNT;
1312
1313 return (KERN_SUCCESS);
1314 }
1315
1316 return (KERN_INVALID_ARGUMENT);
1317 }
1318
1319 void
1320 thread_read_times(
1321 thread_t thread,
1322 time_value_t *user_time,
1323 time_value_t *system_time)
1324 {
1325 clock_sec_t secs;
1326 clock_usec_t usecs;
1327 uint64_t tval_user, tval_system;
1328
1329 tval_user = timer_grab(&thread->user_timer);
1330 tval_system = timer_grab(&thread->system_timer);
1331
1332 if (thread->precise_user_kernel_time) {
1333 absolutetime_to_microtime(tval_user, &secs, &usecs);
1334 user_time->seconds = (typeof(user_time->seconds))secs;
1335 user_time->microseconds = usecs;
1336
1337 absolutetime_to_microtime(tval_system, &secs, &usecs);
1338 system_time->seconds = (typeof(system_time->seconds))secs;
1339 system_time->microseconds = usecs;
1340 } else {
1341 /* system_timer may represent either sys or user */
1342 tval_user += tval_system;
1343 absolutetime_to_microtime(tval_user, &secs, &usecs);
1344 user_time->seconds = (typeof(user_time->seconds))secs;
1345 user_time->microseconds = usecs;
1346
1347 system_time->seconds = 0;
1348 system_time->microseconds = 0;
1349 }
1350 }
1351
1352 kern_return_t
1353 thread_assign(
1354 __unused thread_t thread,
1355 __unused processor_set_t new_pset)
1356 {
1357 return (KERN_FAILURE);
1358 }
1359
1360 /*
1361 * thread_assign_default:
1362 *
1363 * Special version of thread_assign for assigning threads to default
1364 * processor set.
1365 */
1366 kern_return_t
1367 thread_assign_default(
1368 thread_t thread)
1369 {
1370 return (thread_assign(thread, &pset0));
1371 }
1372
1373 /*
1374 * thread_get_assignment
1375 *
1376 * Return current assignment for this thread.
1377 */
1378 kern_return_t
1379 thread_get_assignment(
1380 thread_t thread,
1381 processor_set_t *pset)
1382 {
1383 if (thread == NULL)
1384 return (KERN_INVALID_ARGUMENT);
1385
1386 *pset = &pset0;
1387
1388 return (KERN_SUCCESS);
1389 }
1390
1391 /*
1392 * thread_wire_internal:
1393 *
1394 * Specify that the target thread must always be able
1395 * to run and to allocate memory.
1396 */
1397 kern_return_t
1398 thread_wire_internal(
1399 host_priv_t host_priv,
1400 thread_t thread,
1401 boolean_t wired,
1402 boolean_t *prev_state)
1403 {
1404 if (host_priv == NULL || thread != current_thread())
1405 return (KERN_INVALID_ARGUMENT);
1406
1407 assert(host_priv == &realhost);
1408
1409 if (prev_state)
1410 *prev_state = (thread->options & TH_OPT_VMPRIV) != 0;
1411
1412 if (wired) {
1413 if (!(thread->options & TH_OPT_VMPRIV))
1414 vm_page_free_reserve(1); /* XXX */
1415 thread->options |= TH_OPT_VMPRIV;
1416 }
1417 else {
1418 if (thread->options & TH_OPT_VMPRIV)
1419 vm_page_free_reserve(-1); /* XXX */
1420 thread->options &= ~TH_OPT_VMPRIV;
1421 }
1422
1423 return (KERN_SUCCESS);
1424 }
1425
1426
1427 /*
1428 * thread_wire:
1429 *
1430 * User-api wrapper for thread_wire_internal()
1431 */
1432 kern_return_t
1433 thread_wire(
1434 host_priv_t host_priv,
1435 thread_t thread,
1436 boolean_t wired)
1437 {
1438 return (thread_wire_internal(host_priv, thread, wired, NULL));
1439 }
1440
1441
1442 /*
1443 * XXX assuming current thread only, for now...
1444 */
1445 void
1446 thread_guard_violation(thread_t thread, unsigned type)
1447 {
1448 assert(thread == current_thread());
1449
1450 spl_t s = splsched();
1451 /*
1452 * Use the saved state area of the thread structure
1453 * to store all info required to handle the AST when
1454 * returning to userspace
1455 */
1456 thread->guard_exc_info.type = type;
1457 thread_ast_set(thread, AST_GUARD);
1458 ast_propagate(thread->ast);
1459
1460 splx(s);
1461 }
1462
1463 /*
1464 * guard_ast:
1465 *
1466 * Handle AST_GUARD for a thread. This routine looks at the
1467 * state saved in the thread structure to determine the cause
1468 * of this exception. Based on this value, it invokes the
1469 * appropriate routine which determines other exception related
1470 * info and raises the exception.
1471 */
1472 void
1473 guard_ast(thread_t thread)
1474 {
1475 if (thread->guard_exc_info.type == GUARD_TYPE_MACH_PORT)
1476 mach_port_guard_ast(thread);
1477 else
1478 fd_guard_ast(thread);
1479 }
1480
1481 static void
1482 thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1)
1483 {
1484 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
1485 #if CONFIG_TELEMETRY
1486 /*
1487 * This thread is in danger of violating the CPU usage monitor. Enable telemetry
1488 * on the entire task so there are micro-stackshots available if and when
1489 * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots
1490 * for this thread only; but now that this task is suspect, knowing what all of
1491 * its threads are up to will be useful.
1492 */
1493 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1);
1494 #endif
1495 return;
1496 }
1497
1498 #if CONFIG_TELEMETRY
1499 /*
1500 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
1501 * exceeded the limit, turn telemetry off for the task.
1502 */
1503 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0);
1504 #endif
1505
1506 if (warning == 0) {
1507 THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE();
1508 }
1509 }
1510
1511 void __attribute__((noinline))
1512 THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(void)
1513 {
1514 int pid = 0;
1515 task_t task = current_task();
1516 thread_t thread = current_thread();
1517 uint64_t tid = thread->thread_id;
1518 char *procname = (char *) "unknown";
1519 time_value_t thread_total_time = {0, 0};
1520 time_value_t thread_system_time;
1521 time_value_t thread_user_time;
1522 int action;
1523 uint8_t percentage;
1524 uint32_t limit_percent;
1525 uint32_t usage_percent;
1526 uint32_t interval_sec;
1527 uint64_t interval_ns;
1528 uint64_t balance_ns;
1529 boolean_t fatal = FALSE;
1530
1531 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1532 struct ledger_entry_info lei;
1533
1534 assert(thread->t_threadledger != LEDGER_NULL);
1535
1536 /*
1537 * Now that a thread has tripped the monitor, disable it for the entire task.
1538 */
1539 task_lock(task);
1540 if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) {
1541 /*
1542 * The CPU usage monitor has been disabled on our task, so some other
1543 * thread must have gotten here first. We only send one exception per
1544 * task lifetime, so there's nothing left for us to do here.
1545 */
1546 task_unlock(task);
1547 return;
1548 }
1549 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) {
1550 fatal = TRUE;
1551 }
1552 task_disable_cpumon(task);
1553 task_unlock(task);
1554
1555 #ifdef MACH_BSD
1556 pid = proc_selfpid();
1557 if (task->bsd_info != NULL)
1558 procname = proc_name_address(task->bsd_info);
1559 #endif
1560
1561 thread_get_cpulimit(&action, &percentage, &interval_ns);
1562
1563 interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC);
1564
1565 thread_read_times(thread, &thread_user_time, &thread_system_time);
1566 time_value_add(&thread_total_time, &thread_user_time);
1567 time_value_add(&thread_total_time, &thread_system_time);
1568
1569 ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei);
1570
1571 absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns);
1572 usage_percent = (uint32_t) ((balance_ns * 100ULL) / lei.lei_last_refill);
1573
1574 /* Show refill period in the same units as balance, limit, etc */
1575 nanoseconds_to_absolutetime(lei.lei_refill_period, &lei.lei_refill_period);
1576
1577 limit_percent = (uint32_t) ((lei.lei_limit * 100ULL) / lei.lei_refill_period);
1578
1579 /* TODO: show task total runtime as well? see TASK_ABSOLUTETIME_INFO */
1580
1581 if (disable_exc_resource) {
1582 printf("process %s[%d] thread %llu caught burning CPU!; EXC_RESOURCE "
1583 "supressed by a boot-arg\n", procname, pid, tid);
1584 return;
1585 }
1586
1587 printf("process %s[%d] thread %llu caught burning CPU! "
1588 "It used more than %d%% CPU (Actual recent usage: %d%%) over %d seconds. "
1589 "thread lifetime cpu usage %d.%06d seconds, (%d.%06d user, %d.%06d system) "
1590 "ledger info: balance: %lld credit: %lld debit: %lld limit: %llu (%d%%) "
1591 "period: %llu time since last refill (ns): %llu \n",
1592 procname, pid, tid,
1593 percentage, usage_percent, interval_sec,
1594 thread_total_time.seconds, thread_total_time.microseconds,
1595 thread_user_time.seconds, thread_user_time.microseconds,
1596 thread_system_time.seconds, thread_system_time.microseconds,
1597 lei.lei_balance,
1598 lei.lei_credit, lei.lei_debit,
1599 lei.lei_limit, limit_percent,
1600 lei.lei_refill_period, lei.lei_last_refill);
1601
1602
1603 code[0] = code[1] = 0;
1604 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU);
1605 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR);
1606 EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec);
1607 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], limit_percent);
1608 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent);
1609 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
1610
1611 if (fatal) {
1612 task_terminate_internal(task);
1613 }
1614 }
1615
1616 void
1617 init_thread_ledgers(void) {
1618 ledger_template_t t;
1619 int idx;
1620
1621 assert(thread_ledger_template == NULL);
1622
1623 if ((t = ledger_template_create("Per-thread ledger")) == NULL)
1624 panic("couldn't create thread ledger template");
1625
1626 if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) {
1627 panic("couldn't create cpu_time entry for thread ledger template");
1628 }
1629
1630 if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) {
1631 panic("couldn't set thread ledger callback for cpu_time entry");
1632 }
1633
1634 thread_ledgers.cpu_time = idx;
1635 thread_ledger_template = t;
1636 }
1637
1638 /*
1639 * Returns currently applied CPU usage limit, or 0/0 if none is applied.
1640 */
1641 int
1642 thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns)
1643 {
1644 int64_t abstime = 0;
1645 uint64_t limittime = 0;
1646 thread_t thread = current_thread();
1647
1648 *percentage = 0;
1649 *interval_ns = 0;
1650 *action = 0;
1651
1652 if (thread->t_threadledger == LEDGER_NULL) {
1653 /*
1654 * This thread has no per-thread ledger, so it can't possibly
1655 * have a CPU limit applied.
1656 */
1657 return (KERN_SUCCESS);
1658 }
1659
1660 ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns);
1661 ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime);
1662
1663 if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) {
1664 /*
1665 * This thread's CPU time ledger has no period or limit; so it
1666 * doesn't have a CPU limit applied.
1667 */
1668 return (KERN_SUCCESS);
1669 }
1670
1671 /*
1672 * This calculation is the converse to the one in thread_set_cpulimit().
1673 */
1674 absolutetime_to_nanoseconds(abstime, &limittime);
1675 *percentage = (limittime * 100ULL) / *interval_ns;
1676 assert(*percentage <= 100);
1677
1678 if (thread->options & TH_OPT_PROC_CPULIMIT) {
1679 assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0);
1680
1681 *action = THREAD_CPULIMIT_BLOCK;
1682 } else if (thread->options & TH_OPT_PRVT_CPULIMIT) {
1683 assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0);
1684
1685 *action = THREAD_CPULIMIT_EXCEPTION;
1686 } else {
1687 *action = THREAD_CPULIMIT_DISABLE;
1688 }
1689
1690 return (KERN_SUCCESS);
1691 }
1692
1693 /*
1694 * Set CPU usage limit on a thread.
1695 *
1696 * Calling with percentage of 0 will unset the limit for this thread.
1697 */
1698 int
1699 thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns)
1700 {
1701 thread_t thread = current_thread();
1702 ledger_t l;
1703 uint64_t limittime = 0;
1704 uint64_t abstime = 0;
1705
1706 assert(percentage <= 100);
1707
1708 if (action == THREAD_CPULIMIT_DISABLE) {
1709 /*
1710 * Remove CPU limit, if any exists.
1711 */
1712 if (thread->t_threadledger != LEDGER_NULL) {
1713 l = thread->t_threadledger;
1714 /*
1715 * The only way to get a per-thread ledger is via CPU limits.
1716 */
1717 assert(thread->options & (TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT));
1718 thread->t_threadledger = NULL;
1719 ledger_dereference(l);
1720 thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT);
1721 }
1722
1723 return (0);
1724 }
1725
1726 if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) {
1727 return (KERN_INVALID_ARGUMENT);
1728 }
1729
1730 l = thread->t_threadledger;
1731 if (l == LEDGER_NULL) {
1732 /*
1733 * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active.
1734 */
1735 if ((l = ledger_instantiate(thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL)
1736 return (KERN_RESOURCE_SHORTAGE);
1737
1738 /*
1739 * We are the first to create this thread's ledger, so only activate our entry.
1740 */
1741 ledger_entry_setactive(l, thread_ledgers.cpu_time);
1742 thread->t_threadledger = l;
1743 }
1744
1745 /*
1746 * The limit is specified as a percentage of CPU over an interval in nanoseconds.
1747 * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit.
1748 */
1749 limittime = (interval_ns * percentage) / 100;
1750 nanoseconds_to_absolutetime(limittime, &abstime);
1751 ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct);
1752 /*
1753 * Refill the thread's allotted CPU time every interval_ns nanoseconds.
1754 */
1755 ledger_set_period(l, thread_ledgers.cpu_time, interval_ns);
1756
1757 if (action == THREAD_CPULIMIT_EXCEPTION) {
1758 /*
1759 * We don't support programming the CPU usage monitor on a task if any of its
1760 * threads have a per-thread blocking CPU limit configured.
1761 */
1762 if (thread->options & TH_OPT_PRVT_CPULIMIT) {
1763 panic("CPU usage monitor activated, but blocking thread limit exists");
1764 }
1765
1766 /*
1767 * Make a note that this thread's CPU limit is being used for the task-wide CPU
1768 * usage monitor. We don't have to arm the callback which will trigger the
1769 * exception, because that was done for us in ledger_instantiate (because the
1770 * ledger template used has a default callback).
1771 */
1772 thread->options |= TH_OPT_PROC_CPULIMIT;
1773 } else {
1774 /*
1775 * We deliberately override any CPU limit imposed by a task-wide limit (eg
1776 * CPU usage monitor).
1777 */
1778 thread->options &= ~TH_OPT_PROC_CPULIMIT;
1779
1780 thread->options |= TH_OPT_PRVT_CPULIMIT;
1781 /* The per-thread ledger template by default has a callback for CPU time */
1782 ledger_disable_callback(l, thread_ledgers.cpu_time);
1783 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
1784 }
1785
1786 return (0);
1787 }
1788
1789 int split_funnel_off = 0;
1790 lck_grp_t *funnel_lck_grp = LCK_GRP_NULL;
1791 lck_grp_attr_t *funnel_lck_grp_attr;
1792 lck_attr_t *funnel_lck_attr;
1793
1794 funnel_t *
1795 funnel_alloc(
1796 int type)
1797 {
1798 lck_mtx_t *m;
1799 funnel_t *fnl;
1800
1801 if (funnel_lck_grp == LCK_GRP_NULL) {
1802 funnel_lck_grp_attr = lck_grp_attr_alloc_init();
1803
1804 funnel_lck_grp = lck_grp_alloc_init("Funnel", funnel_lck_grp_attr);
1805
1806 funnel_lck_attr = lck_attr_alloc_init();
1807 }
1808 if ((fnl = (funnel_t *)kalloc(sizeof(funnel_t))) != 0){
1809 bzero((void *)fnl, sizeof(funnel_t));
1810 if ((m = lck_mtx_alloc_init(funnel_lck_grp, funnel_lck_attr)) == (lck_mtx_t *)NULL) {
1811 kfree(fnl, sizeof(funnel_t));
1812 return(THR_FUNNEL_NULL);
1813 }
1814 fnl->fnl_mutex = m;
1815 fnl->fnl_type = type;
1816 }
1817 return(fnl);
1818 }
1819
1820 void
1821 funnel_free(
1822 funnel_t * fnl)
1823 {
1824 lck_mtx_free(fnl->fnl_mutex, funnel_lck_grp);
1825 if (fnl->fnl_oldmutex)
1826 lck_mtx_free(fnl->fnl_oldmutex, funnel_lck_grp);
1827 kfree(fnl, sizeof(funnel_t));
1828 }
1829
1830 void
1831 funnel_lock(
1832 funnel_t * fnl)
1833 {
1834 lck_mtx_lock(fnl->fnl_mutex);
1835 fnl->fnl_mtxholder = current_thread();
1836 }
1837
1838 void
1839 funnel_unlock(
1840 funnel_t * fnl)
1841 {
1842 lck_mtx_unlock(fnl->fnl_mutex);
1843 fnl->fnl_mtxholder = NULL;
1844 fnl->fnl_mtxrelease = current_thread();
1845 }
1846
1847 funnel_t *
1848 thread_funnel_get(
1849 void)
1850 {
1851 thread_t th = current_thread();
1852
1853 if (th->funnel_state & TH_FN_OWNED) {
1854 return(th->funnel_lock);
1855 }
1856 return(THR_FUNNEL_NULL);
1857 }
1858
1859 boolean_t
1860 thread_funnel_set(
1861 funnel_t * fnl,
1862 boolean_t funneled)
1863 {
1864 thread_t cur_thread;
1865 boolean_t funnel_state_prev;
1866 boolean_t intr;
1867
1868 cur_thread = current_thread();
1869 funnel_state_prev = ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED);
1870
1871 if (funnel_state_prev != funneled) {
1872 intr = ml_set_interrupts_enabled(FALSE);
1873
1874 if (funneled == TRUE) {
1875 if (cur_thread->funnel_lock)
1876 panic("Funnel lock called when holding one %p", cur_thread->funnel_lock);
1877 KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE,
1878 fnl, 1, 0, 0, 0);
1879 funnel_lock(fnl);
1880 KERNEL_DEBUG(0x6032434 | DBG_FUNC_NONE,
1881 fnl, 1, 0, 0, 0);
1882 cur_thread->funnel_state |= TH_FN_OWNED;
1883 cur_thread->funnel_lock = fnl;
1884 } else {
1885 if(cur_thread->funnel_lock->fnl_mutex != fnl->fnl_mutex)
1886 panic("Funnel unlock when not holding funnel");
1887 cur_thread->funnel_state &= ~TH_FN_OWNED;
1888 KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE,
1889 fnl, 1, 0, 0, 0);
1890
1891 cur_thread->funnel_lock = THR_FUNNEL_NULL;
1892 funnel_unlock(fnl);
1893 }
1894 (void)ml_set_interrupts_enabled(intr);
1895 } else {
1896 /* if we are trying to acquire funnel recursively
1897 * check for funnel to be held already
1898 */
1899 if (funneled && (fnl->fnl_mutex != cur_thread->funnel_lock->fnl_mutex)) {
1900 panic("thread_funnel_set: already holding a different funnel");
1901 }
1902 }
1903 return(funnel_state_prev);
1904 }
1905
1906 static void
1907 sched_call_null(
1908 __unused int type,
1909 __unused thread_t thread)
1910 {
1911 return;
1912 }
1913
1914 void
1915 thread_sched_call(
1916 thread_t thread,
1917 sched_call_t call)
1918 {
1919 thread->sched_call = (call != NULL)? call: sched_call_null;
1920 }
1921
1922 void
1923 thread_static_param(
1924 thread_t thread,
1925 boolean_t state)
1926 {
1927 thread_mtx_lock(thread);
1928 thread->static_param = state;
1929 thread_mtx_unlock(thread);
1930 }
1931
1932 uint64_t
1933 thread_tid(
1934 thread_t thread)
1935 {
1936 return (thread != THREAD_NULL? thread->thread_id: 0);
1937 }
1938
1939 uint16_t thread_set_tag(thread_t th, uint16_t tag) {
1940 return thread_set_tag_internal(th, tag);
1941 }
1942 uint16_t thread_get_tag(thread_t th) {
1943 return thread_get_tag_internal(th);
1944 }
1945
1946 uint64_t
1947 thread_dispatchqaddr(
1948 thread_t thread)
1949 {
1950 uint64_t dispatchqueue_addr = 0;
1951 uint64_t thread_handle = 0;
1952
1953 if (thread != THREAD_NULL) {
1954 thread_handle = thread->machine.cthread_self;
1955
1956 if (thread->task->bsd_info)
1957 dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
1958 }
1959
1960 return (dispatchqueue_addr);
1961 }
1962
1963 /*
1964 * Export routines to other components for things that are done as macros
1965 * within the osfmk component.
1966 */
1967
1968 #undef thread_reference
1969 void thread_reference(thread_t thread);
1970 void
1971 thread_reference(
1972 thread_t thread)
1973 {
1974 if (thread != THREAD_NULL)
1975 thread_reference_internal(thread);
1976 }
1977
1978 #undef thread_should_halt
1979
1980 boolean_t
1981 thread_should_halt(
1982 thread_t th)
1983 {
1984 return (thread_should_halt_fast(th));
1985 }
1986
1987 #if CONFIG_DTRACE
1988 uint32_t dtrace_get_thread_predcache(thread_t thread)
1989 {
1990 if (thread != THREAD_NULL)
1991 return thread->t_dtrace_predcache;
1992 else
1993 return 0;
1994 }
1995
1996 int64_t dtrace_get_thread_vtime(thread_t thread)
1997 {
1998 if (thread != THREAD_NULL)
1999 return thread->t_dtrace_vtime;
2000 else
2001 return 0;
2002 }
2003
2004 int64_t dtrace_get_thread_tracing(thread_t thread)
2005 {
2006 if (thread != THREAD_NULL)
2007 return thread->t_dtrace_tracing;
2008 else
2009 return 0;
2010 }
2011
2012 boolean_t dtrace_get_thread_reentering(thread_t thread)
2013 {
2014 if (thread != THREAD_NULL)
2015 return (thread->options & TH_OPT_DTRACE) ? TRUE : FALSE;
2016 else
2017 return 0;
2018 }
2019
2020 vm_offset_t dtrace_get_kernel_stack(thread_t thread)
2021 {
2022 if (thread != THREAD_NULL)
2023 return thread->kernel_stack;
2024 else
2025 return 0;
2026 }
2027
2028 int64_t dtrace_calc_thread_recent_vtime(thread_t thread)
2029 {
2030 if (thread != THREAD_NULL) {
2031 processor_t processor = current_processor();
2032 uint64_t abstime = mach_absolute_time();
2033 timer_t timer;
2034
2035 timer = PROCESSOR_DATA(processor, thread_timer);
2036
2037 return timer_grab(&(thread->system_timer)) + timer_grab(&(thread->user_timer)) +
2038 (abstime - timer->tstamp); /* XXX need interrupts off to prevent missed time? */
2039 } else
2040 return 0;
2041 }
2042
2043 void dtrace_set_thread_predcache(thread_t thread, uint32_t predcache)
2044 {
2045 if (thread != THREAD_NULL)
2046 thread->t_dtrace_predcache = predcache;
2047 }
2048
2049 void dtrace_set_thread_vtime(thread_t thread, int64_t vtime)
2050 {
2051 if (thread != THREAD_NULL)
2052 thread->t_dtrace_vtime = vtime;
2053 }
2054
2055 void dtrace_set_thread_tracing(thread_t thread, int64_t accum)
2056 {
2057 if (thread != THREAD_NULL)
2058 thread->t_dtrace_tracing = accum;
2059 }
2060
2061 void dtrace_set_thread_reentering(thread_t thread, boolean_t vbool)
2062 {
2063 if (thread != THREAD_NULL) {
2064 if (vbool)
2065 thread->options |= TH_OPT_DTRACE;
2066 else
2067 thread->options &= (~TH_OPT_DTRACE);
2068 }
2069 }
2070
2071 vm_offset_t dtrace_set_thread_recover(thread_t thread, vm_offset_t recover)
2072 {
2073 vm_offset_t prev = 0;
2074
2075 if (thread != THREAD_NULL) {
2076 prev = thread->recover;
2077 thread->recover = recover;
2078 }
2079 return prev;
2080 }
2081
2082 void dtrace_thread_bootstrap(void)
2083 {
2084 task_t task = current_task();
2085
2086 if (task->thread_count == 1) {
2087 thread_t thread = current_thread();
2088 if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) {
2089 thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS;
2090 DTRACE_PROC(exec__success);
2091 }
2092 DTRACE_PROC(start);
2093 }
2094 DTRACE_PROC(lwp__start);
2095
2096 }
2097
2098 void
2099 dtrace_thread_didexec(thread_t thread)
2100 {
2101 thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS;
2102 }
2103 #endif /* CONFIG_DTRACE */