]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/syscall_subr.c
4d65fe2ae3c3bdb4ca5a0ba2b7089041ec3d3495
[apple/xnu.git] / osfmk / kern / syscall_subr.c
1 /*
2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57 #include <mach/boolean.h>
58 #include <mach/thread_switch.h>
59 #include <ipc/ipc_port.h>
60 #include <ipc/ipc_space.h>
61 #include <kern/counters.h>
62 #include <kern/ipc_kobject.h>
63 #include <kern/processor.h>
64 #include <kern/sched.h>
65 #include <kern/sched_prim.h>
66 #include <kern/spl.h>
67 #include <kern/task.h>
68 #include <kern/thread.h>
69 #include <kern/policy_internal.h>
70
71 #include <mach/policy.h>
72
73 #include <kern/syscall_subr.h>
74 #include <mach/mach_host_server.h>
75 #include <mach/mach_syscalls.h>
76 #include <sys/kdebug.h>
77 #include <kern/ast.h>
78
79 static void thread_depress_abstime(uint64_t interval);
80 static void thread_depress_ms(mach_msg_timeout_t interval);
81
82 /* Called from commpage to take a delayed preemption when exiting
83 * the "Preemption Free Zone" (PFZ).
84 */
85 kern_return_t
86 pfz_exit(
87 __unused struct pfz_exit_args *args)
88 {
89 /* For now, nothing special to do. We'll pick up the ASTs on kernel exit. */
90
91 return KERN_SUCCESS;
92 }
93
94
95 /*
96 * swtch and swtch_pri both attempt to context switch (logic in
97 * thread_block no-ops the context switch if nothing would happen).
98 * A boolean is returned that indicates whether there is anything
99 * else runnable. That's no excuse to spin, though.
100 */
101
102 static void
103 swtch_continue(void)
104 {
105 processor_t myprocessor;
106 boolean_t result;
107
108 disable_preemption();
109 myprocessor = current_processor();
110 result = SCHED(thread_should_yield)(myprocessor, current_thread());
111 enable_preemption();
112
113 ml_delay_on_yield();
114
115 thread_syscall_return(result);
116 /*NOTREACHED*/
117 }
118
119 boolean_t
120 swtch(
121 __unused struct swtch_args *args)
122 {
123 processor_t myprocessor;
124
125 disable_preemption();
126 myprocessor = current_processor();
127 if (!SCHED(thread_should_yield)(myprocessor, current_thread())) {
128 mp_enable_preemption();
129
130 return FALSE;
131 }
132 enable_preemption();
133
134 counter(c_swtch_block++);
135
136 thread_yield_with_continuation((thread_continue_t)swtch_continue, NULL);
137 }
138
139 static void
140 swtch_pri_continue(void)
141 {
142 processor_t myprocessor;
143 boolean_t result;
144
145 thread_depress_abort(current_thread());
146
147 disable_preemption();
148 myprocessor = current_processor();
149 result = SCHED(thread_should_yield)(myprocessor, current_thread());
150 mp_enable_preemption();
151
152 ml_delay_on_yield();
153
154 thread_syscall_return(result);
155 /*NOTREACHED*/
156 }
157
158 boolean_t
159 swtch_pri(
160 __unused struct swtch_pri_args *args)
161 {
162 processor_t myprocessor;
163
164 disable_preemption();
165 myprocessor = current_processor();
166 if (!SCHED(thread_should_yield)(myprocessor, current_thread())) {
167 mp_enable_preemption();
168
169 return FALSE;
170 }
171 enable_preemption();
172
173 counter(c_swtch_pri_block++);
174
175 thread_depress_abstime(thread_depress_time);
176
177 thread_yield_with_continuation((thread_continue_t)swtch_pri_continue, NULL);
178 }
179
180 static void
181 thread_switch_continue(void *parameter, __unused int ret)
182 {
183 thread_t self = current_thread();
184 int option = (int)(intptr_t)parameter;
185
186 if (option == SWITCH_OPTION_DEPRESS || option == SWITCH_OPTION_OSLOCK_DEPRESS) {
187 thread_depress_abort(self);
188 }
189
190 ml_delay_on_yield();
191
192 thread_syscall_return(KERN_SUCCESS);
193 /*NOTREACHED*/
194 }
195
196 /*
197 * thread_switch:
198 *
199 * Context switch. User may supply thread hint.
200 */
201 kern_return_t
202 thread_switch(
203 struct thread_switch_args *args)
204 {
205 thread_t thread = THREAD_NULL;
206 thread_t self = current_thread();
207 mach_port_name_t thread_name = args->thread_name;
208 int option = args->option;
209 mach_msg_timeout_t option_time = args->option_time;
210 uint32_t scale_factor = NSEC_PER_MSEC;
211 boolean_t depress_option = FALSE;
212 boolean_t wait_option = FALSE;
213 wait_interrupt_t interruptible = THREAD_ABORTSAFE;
214 port_to_thread_options_t ptt_options = PORT_TO_THREAD_NOT_CURRENT_THREAD;
215
216 /*
217 * Validate and process option.
218 *
219 * OSLock boosting only applies to other threads
220 * in your same task (even if you have a port for
221 * a thread in another task)
222 */
223 switch (option) {
224 case SWITCH_OPTION_NONE:
225 break;
226 case SWITCH_OPTION_WAIT:
227 wait_option = TRUE;
228 break;
229 case SWITCH_OPTION_DEPRESS:
230 depress_option = TRUE;
231 break;
232 case SWITCH_OPTION_DISPATCH_CONTENTION:
233 scale_factor = NSEC_PER_USEC;
234 wait_option = TRUE;
235 interruptible |= THREAD_WAIT_NOREPORT;
236 break;
237 case SWITCH_OPTION_OSLOCK_DEPRESS:
238 depress_option = TRUE;
239 interruptible |= THREAD_WAIT_NOREPORT;
240 ptt_options |= PORT_TO_THREAD_IN_CURRENT_TASK;
241 break;
242 case SWITCH_OPTION_OSLOCK_WAIT:
243 wait_option = TRUE;
244 interruptible |= THREAD_WAIT_NOREPORT;
245 ptt_options |= PORT_TO_THREAD_IN_CURRENT_TASK;
246 break;
247 default:
248 return KERN_INVALID_ARGUMENT;
249 }
250
251 /*
252 * Translate the port name if supplied.
253 */
254 if (thread_name != MACH_PORT_NULL) {
255 thread = port_name_to_thread(thread_name, ptt_options);
256 }
257
258 if (option == SWITCH_OPTION_OSLOCK_DEPRESS || option == SWITCH_OPTION_OSLOCK_WAIT) {
259 if (thread != THREAD_NULL) {
260 /*
261 * Attempt to kick the lock owner up to our same IO throttling tier.
262 * If the thread is currently blocked in throttle_lowpri_io(),
263 * it will immediately break out.
264 *
265 * TODO: SFI break out?
266 */
267 int new_policy = proc_get_effective_thread_policy(self, TASK_POLICY_IO);
268
269 set_thread_iotier_override(thread, new_policy);
270 }
271 }
272
273 /*
274 * Try to handoff if supplied.
275 */
276 if (thread != THREAD_NULL) {
277 spl_t s = splsched();
278
279 /* This may return a different thread if the target is pushing on something */
280 thread_t pulled_thread = thread_run_queue_remove_for_handoff(thread);
281
282 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_THREAD_SWITCH) | DBG_FUNC_NONE,
283 thread_tid(thread), thread->state,
284 pulled_thread ? TRUE : FALSE, 0, 0);
285
286 if (pulled_thread != THREAD_NULL) {
287 /* We can't be dropping the last ref here */
288 thread_deallocate_safe(thread);
289
290 if (wait_option) {
291 assert_wait_timeout((event_t)assert_wait_timeout, interruptible,
292 option_time, scale_factor);
293 } else if (depress_option) {
294 thread_depress_ms(option_time);
295 }
296
297 thread_run(self, thread_switch_continue, (void *)(intptr_t)option, pulled_thread);
298 __builtin_unreachable();
299 }
300
301 splx(s);
302
303 thread_deallocate(thread);
304 }
305
306 if (wait_option) {
307 assert_wait_timeout((event_t)assert_wait_timeout, interruptible, option_time, scale_factor);
308 } else {
309 disable_preemption();
310 bool should_yield = SCHED(thread_should_yield)(current_processor(), current_thread());
311 enable_preemption();
312
313 if (should_yield == false) {
314 /* Early-return if yielding to the scheduler will not be beneficial */
315 return KERN_SUCCESS;
316 }
317
318 if (depress_option) {
319 thread_depress_ms(option_time);
320 }
321 }
322
323 thread_yield_with_continuation(thread_switch_continue, (void *)(intptr_t)option);
324 __builtin_unreachable();
325 }
326
327 void
328 thread_yield_with_continuation(
329 thread_continue_t continuation,
330 void *parameter)
331 {
332 assert(continuation);
333 thread_block_reason(continuation, parameter, AST_YIELD);
334 __builtin_unreachable();
335 }
336
337 /* This function is called after an assert_wait(), therefore it must not
338 * cause another wait until after the thread_run() or thread_block()
339 *
340 * Following are the calling convention for thread ref deallocation.
341 *
342 * 1) If no continuation is provided, then thread ref is consumed.
343 * (thread_handoff_deallocate convention).
344 *
345 * 2) If continuation is provided with option THREAD_HANDOFF_SETRUN_NEEDED
346 * then thread ref is always consumed.
347 *
348 * 3) If continuation is provided with option THREAD_HANDOFF_NONE then thread
349 * ref is not consumed and it is upto the continuation to deallocate
350 * the thread reference.
351 */
352 static wait_result_t
353 thread_handoff_internal(thread_t thread, thread_continue_t continuation,
354 void *parameter, thread_handoff_option_t option)
355 {
356 thread_t self = current_thread();
357
358 /*
359 * Try to handoff if supplied.
360 */
361 if (thread != THREAD_NULL) {
362 spl_t s = splsched();
363
364 thread_t pulled_thread = thread_prepare_for_handoff(thread, option);
365
366 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_THREAD_SWITCH) | DBG_FUNC_NONE,
367 thread_tid(thread), thread->state,
368 pulled_thread ? TRUE : FALSE, 0, 0);
369
370 /* Deallocate thread ref if needed */
371 if (continuation == NULL || (option & THREAD_HANDOFF_SETRUN_NEEDED)) {
372 /* Use the safe version of thread deallocate */
373 thread_deallocate_safe(thread);
374 }
375
376 if (pulled_thread != THREAD_NULL) {
377 int result = thread_run(self, continuation, parameter, pulled_thread);
378
379 splx(s);
380 return result;
381 }
382
383 splx(s);
384 }
385
386 int result = thread_block_parameter(continuation, parameter);
387 return result;
388 }
389
390 void
391 thread_handoff_parameter(thread_t thread, thread_continue_t continuation,
392 void *parameter, thread_handoff_option_t option)
393 {
394 thread_handoff_internal(thread, continuation, parameter, option);
395 panic("NULL continuation passed to %s", __func__);
396 __builtin_unreachable();
397 }
398
399 wait_result_t
400 thread_handoff_deallocate(thread_t thread, thread_handoff_option_t option)
401 {
402 return thread_handoff_internal(thread, NULL, NULL, option);
403 }
404
405 /*
406 * Thread depression
407 *
408 * This mechanism drops a thread to priority 0 in order for it to yield to
409 * all other runnnable threads on the system. It can be canceled or timed out,
410 * whereupon the thread goes back to where it was.
411 *
412 * Note that TH_SFLAG_DEPRESS and TH_SFLAG_POLLDEPRESS are never set at the
413 * same time. DEPRESS always defers to POLLDEPRESS.
414 *
415 * DEPRESS only lasts across a single thread_block call, and never returns
416 * to userspace.
417 * POLLDEPRESS can be active anywhere up until thread termination.
418 */
419
420 /*
421 * Depress thread's priority to lowest possible for the specified interval,
422 * with an interval of zero resulting in no timeout being scheduled.
423 *
424 * Must block with AST_YIELD afterwards to take effect
425 */
426 void
427 thread_depress_abstime(uint64_t interval)
428 {
429 thread_t self = current_thread();
430
431 spl_t s = splsched();
432 thread_lock(self);
433
434 assert((self->sched_flags & TH_SFLAG_DEPRESS) == 0);
435
436 if ((self->sched_flags & TH_SFLAG_POLLDEPRESS) == 0) {
437 self->sched_flags |= TH_SFLAG_DEPRESS;
438 thread_recompute_sched_pri(self, SETPRI_LAZY);
439
440 if (interval != 0) {
441 uint64_t deadline;
442
443 clock_absolutetime_interval_to_deadline(interval, &deadline);
444 if (!timer_call_enter(&self->depress_timer, deadline, TIMER_CALL_USER_CRITICAL)) {
445 self->depress_timer_active++;
446 }
447 }
448 }
449
450 thread_unlock(self);
451 splx(s);
452 }
453
454 void
455 thread_depress_ms(mach_msg_timeout_t interval)
456 {
457 uint64_t abstime;
458
459 clock_interval_to_absolutetime_interval(interval, NSEC_PER_MSEC, &abstime);
460 thread_depress_abstime(abstime);
461 }
462
463 /*
464 * Priority depression expiration.
465 */
466 void
467 thread_depress_expire(void *p0,
468 __unused void *p1)
469 {
470 thread_t thread = (thread_t)p0;
471
472 spl_t s = splsched();
473 thread_lock(thread);
474
475 assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK);
476
477 if (--thread->depress_timer_active == 0) {
478 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
479 if ((thread->state & TH_RUN) == TH_RUN) {
480 thread->last_basepri_change_time = mach_absolute_time();
481 }
482 thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
483 }
484
485 thread_unlock(thread);
486 splx(s);
487 }
488
489 /*
490 * Prematurely abort priority depression if there is one.
491 */
492 kern_return_t
493 thread_depress_abort(thread_t thread)
494 {
495 kern_return_t result = KERN_NOT_DEPRESSED;
496
497 spl_t s = splsched();
498 thread_lock(thread);
499
500 assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK);
501
502 /*
503 * User-triggered depress-aborts should not get out
504 * of the poll-depress, but they should cancel a regular depress.
505 */
506 if ((thread->sched_flags & TH_SFLAG_POLLDEPRESS) == 0) {
507 result = thread_depress_abort_locked(thread);
508 }
509
510 thread_unlock(thread);
511 splx(s);
512
513 return result;
514 }
515
516 /*
517 * Prematurely abort priority depression or poll depression if one is active.
518 * Called with the thread locked.
519 */
520 kern_return_t
521 thread_depress_abort_locked(thread_t thread)
522 {
523 if ((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) == 0) {
524 return KERN_NOT_DEPRESSED;
525 }
526
527 assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK);
528
529 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
530 if ((thread->state & TH_RUN) == TH_RUN) {
531 thread->last_basepri_change_time = mach_absolute_time();
532 }
533
534 thread_recompute_sched_pri(thread, SETPRI_LAZY);
535
536 if (timer_call_cancel(&thread->depress_timer)) {
537 thread->depress_timer_active--;
538 }
539
540 return KERN_SUCCESS;
541 }
542
543 /*
544 * Invoked as part of a polling operation like a no-timeout port receive
545 *
546 * Forces a fixpri thread to yield if it is detected polling without blocking for too long.
547 */
548 void
549 thread_poll_yield(thread_t self)
550 {
551 assert(self == current_thread());
552 assert((self->sched_flags & TH_SFLAG_DEPRESS) == 0);
553
554 if (self->sched_mode != TH_MODE_FIXED) {
555 return;
556 }
557
558 spl_t s = splsched();
559
560 uint64_t abstime = mach_absolute_time();
561 uint64_t total_computation = abstime -
562 self->computation_epoch + self->computation_metered;
563
564 if (total_computation >= max_poll_computation) {
565 thread_lock(self);
566
567 self->computation_epoch = abstime;
568 self->computation_metered = 0;
569
570 uint64_t yield_expiration = abstime +
571 (total_computation >> sched_poll_yield_shift);
572
573 if (!timer_call_enter(&self->depress_timer, yield_expiration,
574 TIMER_CALL_USER_CRITICAL)) {
575 self->depress_timer_active++;
576 }
577
578 self->sched_flags |= TH_SFLAG_POLLDEPRESS;
579 thread_recompute_sched_pri(self, SETPRI_DEFAULT);
580
581 thread_unlock(self);
582 }
583 splx(s);
584 }
585
586 /*
587 * Kernel-internal interface to yield for a specified period
588 *
589 * WARNING: Will still yield to priority 0 even if the thread is holding a contended lock!
590 */
591 void
592 thread_yield_internal(mach_msg_timeout_t ms)
593 {
594 thread_t self = current_thread();
595
596 assert((self->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK);
597
598 processor_t myprocessor;
599
600 disable_preemption();
601 myprocessor = current_processor();
602 if (!SCHED(thread_should_yield)(myprocessor, self)) {
603 mp_enable_preemption();
604
605 return;
606 }
607 enable_preemption();
608
609 thread_depress_ms(ms);
610
611 thread_block_reason(THREAD_CONTINUE_NULL, NULL, AST_YIELD);
612
613 thread_depress_abort(self);
614 }
615
616 /*
617 * This yields to a possible non-urgent preemption pending on the current processor.
618 *
619 * This is useful when doing a long computation in the kernel without returning to userspace.
620 *
621 * As opposed to other yielding mechanisms, this does not drop the priority of the current thread.
622 */
623 void
624 thread_yield_to_preemption()
625 {
626 /*
627 * ast_pending() should ideally be called with interrupts disabled, but
628 * the check here is fine because csw_check() will do the right thing.
629 */
630 ast_t *pending_ast = ast_pending();
631 ast_t ast = AST_NONE;
632 processor_t p;
633
634 if (*pending_ast & AST_PREEMPT) {
635 thread_t self = current_thread();
636
637 spl_t s = splsched();
638
639 p = current_processor();
640 thread_lock(self);
641 ast = csw_check(self, p, AST_YIELD);
642 ast_on(ast);
643 thread_unlock(self);
644
645 if (ast != AST_NONE) {
646 (void)thread_block_reason(THREAD_CONTINUE_NULL, NULL, ast);
647 }
648
649 splx(s);
650 }
651 }